blueloveTH před 8 měsíci
rodič
revize
be2aae493a
2 změnil soubory, kde provedl 140 přidání a 115 odebrání
  1. 4 8
      include/pocketpy/interpreter/types.h
  2. 136 107
      src/public/py_dict.c

+ 4 - 8
include/pocketpy/interpreter/types.h

@@ -3,26 +3,22 @@
 #include "pocketpy/common/vector.h"
 #include "pocketpy/objects/base.h"
 
-#define PK_DICT_MAX_COLLISION 4
-
 typedef struct {
     uint64_t hash;
     py_TValue key;
     py_TValue val;
 } DictEntry;
 
-typedef struct {
-    int _[PK_DICT_MAX_COLLISION];
-} DictIndex;
-
 typedef struct {
     int length;
     uint32_t capacity;
-    DictIndex* indices;
+    void* indices;
+    bool index_is_short;
+    uint32_t null_index_value;
     c11_vector /*T=DictEntry*/ entries;
 } Dict;
 
 typedef c11_vector List;
 
 void c11_chunked_array2d__mark(void* ud, c11_vector* p_stack);
-void function__gc_mark(void* ud, c11_vector* p_stack);
+void function__gc_mark(void* ud, c11_vector* p_stack);

+ 136 - 107
src/public/py_dict.c

@@ -51,8 +51,6 @@ static uint32_t Dict__next_cap(uint32_t cap) {
     }
 }
 
-
-
 typedef struct {
     DictEntry* curr;
     DictEntry* end;
@@ -61,9 +59,24 @@ typedef struct {
 
 static void Dict__ctor(Dict* self, uint32_t capacity, int entries_capacity) {
     self->length = 0;
-    self->capacity = capacity;
-    self->indices = PK_MALLOC(self->capacity * sizeof(DictIndex));
-    memset(self->indices, -1, self->capacity * sizeof(DictIndex));
+    self->capacity = capacity;  // the 1st prime
+
+    size_t indices_size;
+    if(self->capacity < UINT16_MAX - 1) {
+        self->index_is_short = true;
+        indices_size = self->capacity * sizeof(uint16_t);
+        self->null_index_value = UINT16_MAX;
+        self->deleted_index_value = UINT16_MAX - 1;
+    } else {
+        self->index_is_short = false;
+        indices_size = self->capacity * sizeof(uint32_t);
+        self->null_index_value = UINT32_MAX;
+        self->deleted_index_value = UINT32_MAX - 1;
+    }
+
+    self->indices = PK_MALLOC(indices_size);
+    memset(self->indices, -1, indices_size);
+
     c11_vector__ctor(&self->entries, sizeof(DictEntry));
     c11_vector__reserve(&self->entries, entries_capacity);
 }
@@ -75,65 +88,105 @@ static void Dict__dtor(Dict* self) {
     c11_vector__dtor(&self->entries);
 }
 
-static bool Dict__try_get(Dict* self, py_TValue* key, DictEntry** out) {
-    py_i64 hash;
-    if(!py_hash(key, &hash)) return false;
-    int idx = (uint64_t)hash % self->capacity;
-    for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
-        int idx2 = self->indices[idx]._[i];
-        if(idx2 == -1) continue;
+static uint32_t Dict__get_index(Dict* self, uint32_t index) {
+    if(self->index_is_short) {
+        uint16_t* indices = self->indices;
+        return indices[index];
+    } else {
+        uint32_t* indices = self->indices;
+        return indices[index];
+    }
+}
+
+static void Dict__swap_index(Dict* self, uint32_t x, uint32_t y) {
+    if(self->index_is_short) {
+        uint16_t* indices = self->indices;
+        uint16_t tmp = indices[x];
+        indices[x] = indices[y];
+        indices[y] = tmp;
+    } else {
+        uint32_t* indices = self->indices;
+        uint32_t tmp = indices[x];
+        indices[x] = indices[y];
+        indices[y] = tmp;
+    }
+}
+
+static void Dict__set_index(Dict* self, uint32_t index, uint32_t value) {
+    if(self->index_is_short) {
+        uint16_t* indices = self->indices;
+        indices[index] = (uint16_t)value;
+    } else {
+        uint32_t* indices = self->indices;
+        indices[index] = value;
+    }
+}
+
+static bool
+    Dict__probe(Dict* self, py_TValue* key, py_i64* p_hash, uint32_t* p_idx, DictEntry** p_entry) {
+    if(!py_hash(key, p_hash)) return false;
+    py_i64 hash = *p_hash;
+    uint32_t idx = (uint64_t)hash % self->capacity;
+    const uint32_t max_idx = self->capacity - 1;
+    while(true) {
+        uint32_t idx2 = Dict__get_index(self, idx);
+        if(idx2 == self->null_index_value) break;
         DictEntry* entry = c11__at(DictEntry, &self->entries, idx2);
         if(entry->hash == (uint64_t)hash) {
             int res = py_equal(&entry->key, key);
             if(res == 1) {
-                *out = entry;
+                *p_idx = idx;
+                *p_entry = entry;
                 return true;
             }
             if(res == -1) return false;  // error
         }
+        // try next index
+        idx = idx < max_idx ? idx + 1 : 0;
     }
-    *out = NULL;
+    // not found
+    *p_idx = idx;
+    *p_entry = NULL;
     return true;
 }
 
+static bool Dict__try_get(Dict* self, py_TValue* key, DictEntry** out) {
+    py_i64 hash;
+    uint32_t idx;
+    return Dict__probe(self, key, &hash, &idx, out);
+}
+
 static void Dict__clear(Dict* self) {
-    memset(self->indices, -1, self->capacity * sizeof(DictIndex));
+    size_t indices_size = self->index_is_short ? self->capacity * sizeof(uint16_t)
+                                               : self->capacity * sizeof(uint32_t);
+    memset(self->indices, -1, indices_size);
     c11_vector__clear(&self->entries);
     self->length = 0;
 }
 
 static void Dict__rehash_2x(Dict* self) {
     Dict old_dict = *self;
-    uint32_t new_capacity = self->capacity;
-
-__RETRY:
-    // use next capacity
-    new_capacity = Dict__next_cap(new_capacity);
+    uint32_t new_capacity = Dict__next_cap(new_capacity);
     // create a new dict with new capacity
     Dict__ctor(self, new_capacity, old_dict.entries.capacity);
     // move entries from old dict to new dict
+    const uint32_t max_idx = new_capacity - 1;
     for(int i = 0; i < old_dict.entries.length; i++) {
         DictEntry* old_entry = c11__at(DictEntry, &old_dict.entries, i);
         if(py_isnil(&old_entry->key)) continue;
-        int idx = old_entry->hash % new_capacity;
-        bool success = false;
-        for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
-            int idx2 = self->indices[idx]._[i];
-            if(idx2 == -1) {
-                // insert new entry (empty slot)
+        uint32_t idx = old_entry->hash % new_capacity;
+        while(true) {
+            uint32_t idx2 = Dict__get_index(self, idx);
+            if(idx2 == self->null_index_value) {
                 c11_vector__push(DictEntry, &self->entries, *old_entry);
-                self->indices[idx]._[i] = self->entries.length - 1;
+                Dict__set_index(self, idx, self->entries.length - 1);
                 self->length++;
-                success = true;
                 break;
             }
-        }
-        if(!success) {
-            Dict__dtor(self);
-            goto __RETRY;
+            // try next index
+            idx = idx < max_idx ? idx + 1 : 0;
         }
     }
-    // done
     Dict__dtor(&old_dict);
 }
 
@@ -153,93 +206,69 @@ static void Dict__compact_entries(Dict* self) {
     }
     self->entries.length = n;
     // update indices
-    for(uint32_t i = 0; i < self->capacity; i++) {
-        for(int j = 0; j < PK_DICT_MAX_COLLISION; j++) {
-            int idx = self->indices[i]._[j];
-            if(idx == -1) continue;
-            self->indices[i]._[j] = mappings[idx];
-        }
+    for(int idx = 0; idx < self->capacity; idx++) {
+        uint32_t idx2 = Dict__get_index(self, idx);
+        if(idx2 == self->null_index_value) continue;
+        Dict__set_index(self, idx, mappings[idx2]);
     }
     PK_FREE(mappings);
 }
 
 static bool Dict__set(Dict* self, py_TValue* key, py_TValue* val) {
     py_i64 hash;
-    if(!py_hash(key, &hash)) return false;
-    int idx = (uint64_t)hash % self->capacity;
-    int bad_hash_count = 0;
-    for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
-        int idx2 = self->indices[idx]._[i];
-        if(idx2 == -1) {
-            // insert new entry
-            DictEntry* new_entry = c11_vector__emplace(&self->entries);
-            new_entry->hash = (uint64_t)hash;
-            new_entry->key = *key;
-            new_entry->val = *val;
-            self->indices[idx]._[i] = self->entries.length - 1;
-            self->length++;
-            return true;
-        }
+    uint32_t idx;
+    DictEntry* entry;
+    if(!Dict__probe(self, key, &hash, &idx, &entry)) return false;
+    if(entry) {
         // update existing entry
-        DictEntry* entry = c11__at(DictEntry, &self->entries, idx2);
-        // check if they have the same hash
-        if(entry->hash == (uint64_t)hash) {
-            // check if they are equal
-            int res = py_equal(&entry->key, key);
-            if(res == 1) {
-                entry->val = *val;
-                return true;
-            }
-            if(res == -1) return false;  // error
-            // res == 0
-            bad_hash_count++;
-        }
-    }
-    // no empty slot found
-    if(bad_hash_count == PK_DICT_MAX_COLLISION) {
-        // all `PK_DICT_MAX_COLLISION` slots have the same hash but different keys
-        // we are unable to solve this collision via rehashing
-        return RuntimeError("dict: %d/%d/%d: maximum collision reached (hash=%i)",
-                            self->entries.length,
-                            self->entries.capacity,
-                            self->capacity,
-                            hash);
-    }
-
-    if(self->capacity >= (uint32_t)self->entries.length * 10) {
-        return RuntimeError("dict: %d/%d/%d: minimum load factor reached",
-                            self->entries.length,
-                            self->entries.capacity,
-                            self->capacity);
+        entry->val = *val;
+        return true;
     }
-    Dict__rehash_2x(self);
-    return Dict__set(self, key, val);
+    // insert new entry
+    DictEntry* new_entry = c11_vector__emplace(&self->entries);
+    new_entry->hash = (uint64_t)hash;
+    new_entry->key = *key;
+    new_entry->val = *val;
+    Dict__set_index(self, idx, self->entries.length - 1);
+    self->length++;
+    // check if we need to rehash
+    float load_factor = (float)self->length / self->capacity;
+    if(load_factor > 4 / 7.0f) Dict__rehash_2x(self);
+    return true;
 }
 
 /// Delete an entry from the dict.
 /// -1: error, 0: not found, 1: found and deleted
 static int Dict__pop(Dict* self, py_Ref key) {
     py_i64 hash;
-    if(!py_hash(key, &hash)) return -1;
-    int idx = (uint64_t)hash % self->capacity;
-    for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
-        int idx2 = self->indices[idx]._[i];
-        if(idx2 == -1) continue;
-        DictEntry* entry = c11__at(DictEntry, &self->entries, idx2);
-        if(entry->hash == (uint64_t)hash) {
-            int res = py_equal(&entry->key, key);
-            if(res == 1) {
-                *py_retval() = entry->val;
-                py_newnil(&entry->key);
-                self->indices[idx]._[i] = -1;
-                self->length--;
-                if(self->length < self->entries.length / 2) Dict__compact_entries(self);
-                return 1;
-            }
-            if(res == -1) return -1;  // error
-        }
+    uint32_t idx;
+    DictEntry* entry;
+    if(!Dict__probe(self, key, &hash, &idx, &entry)) return -1;
+    if(!entry) return 0;  // not found
+
+    // found the entry, delete and return it
+    py_assign(py_retval(), &entry->val);
+    Dict__set_index(self, idx, self->null_index_value);
+    py_newnil(&entry->key);
+    py_newnil(&entry->val);
+    self->length--;
+    // tidy indices
+    uint32_t pre_z = idx;
+    const uint32_t max_idx = self->capacity - 1;
+    uint32_t z = idx < max_idx ? idx + 1 : 0;
+    while(true) {
+        uint32_t idx2 = Dict__get_index(self, z);
+        if(idx2 == self->null_index_value) break;
+        uint64_t h = c11__at(DictEntry, &self->entries, idx2)->hash;
+        if(h != hash) break;
+        Dict__swap_index(self, pre_z, z);
+        pre_z = z;
+        z = z < max_idx ? z + 1 : 0;
     }
-    return 0;
+    // compact entries if necessary
+    if(self->entries.length > 16 && self->length < self->entries.length / 2)
+        Dict__compact_entries(self);
+    return 1;
 }
 
 static void DictIterator__ctor(DictIterator* self, Dict* dict, int mode) {
@@ -262,13 +291,13 @@ static bool dict__new__(int argc, py_Ref argv) {
     py_Type cls = py_totype(argv);
     int slots = cls == tp_dict ? 0 : -1;
     Dict* ud = py_newobject(py_retval(), cls, slots, sizeof(Dict));
-    Dict__ctor(ud, 7, 8);
+    Dict__ctor(ud, 7, 4);
     return true;
 }
 
 void py_newdict(py_OutRef out) {
     Dict* ud = py_newobject(out, tp_dict, 0, sizeof(Dict));
-    Dict__ctor(ud, 7, 8);
+    Dict__ctor(ud, 7, 4);
 }
 
 static bool dict__init__(int argc, py_Ref argv) {