Browse Source

Merge pull request #271 from szdytom/make-dict-c11

Make `Dict` and `DictIter` c11
BLUELOVETH 1 year ago
parent
commit
444434efb6

+ 20 - 0
benchmarks/dict_0.py

@@ -0,0 +1,20 @@
+# test basic get/set
+import random
+random.seed(7)
+
+a = {str(i): i for i in range(100)}
+a['existed'] = 0
+a['missed'] = 0
+
+for i in range(1000000):
+    key = str(random.randint(-100, 100))
+    if key in a:
+        a['existed'] += 1
+    else:
+        a['missed'] += 1
+
+existed = a['existed']
+missed = a['missed']
+
+assert abs(existed - missed) < 10000
+

+ 27 - 0
benchmarks/dict_1.py

@@ -0,0 +1,27 @@
+# test deletion
+rnd = 0
+keys = []
+while True:
+    keys.append(rnd)
+    rnd = ((rnd * 5) + 1) & 1023
+    if rnd == 0:
+        break
+
+assert len(keys) == 1024
+
+a = {k: k for k in keys}
+
+for i in range(10000):
+    if i % 2 == 0:
+        # del all keys
+        for k in keys:
+            del a[k]
+        assert len(a) == 0
+    else:
+        # add keys back
+        for k in keys:
+            a[k] = k
+        assert len(a) == len(keys)
+
+assert len(a) == len(keys)
+assert list(a.keys()) == keys   # order matters

+ 2 - 0
build_g.sh

@@ -1,3 +1,5 @@
+set -e
+
 python prebuild.py
 
 SRC_C=$(find src/ -name "*.c")

+ 2 - 2
include/pocketpy/interpreter/iter.hpp

@@ -74,9 +74,9 @@ struct Generator {
 
 struct DictItemsIter {
     PyVar ref;
-    int i;
+    pkpy_DictIter it;
 
-    DictItemsIter(PyVar ref) : ref(ref) { i = PK_OBJ_GET(Dict, ref)._head_idx; }
+    DictItemsIter(PyVar ref) : ref(ref) { it = PK_OBJ_GET(Dict, ref).iter(); }
 
     void _gc_mark(VM* vm) const { vm->obj_gc_mark(ref); }
 

+ 113 - 0
include/pocketpy/objects/dict.h

@@ -0,0 +1,113 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include "pocketpy/objects/pyvar.h"
+#include "pocketpy/common/vector.h"
+
+/** @brief `pkpy_Dict` is the Dict type in Python */
+typedef struct {
+    int count;             /** number of elements in the dictionary */
+    c11_vector _entries;   /** contains `pkpy_DictEntry` (hidden type) */
+    int _htcap;            /** capacity of the hashtable, always a power of 2 */
+    void* _hashtable;      /** contains indecies, can be `u8`, `u16` or `u32` according to size*/
+} pkpy_Dict;
+
+/** @brief `pkpy_DictIter` is used to iterate over a `pkpy_Dict` */
+typedef struct {
+    const pkpy_Dict* _dict;
+    int _index;
+} pkpy_DictIter;
+
+/**
+ * @brief `pkpy_Dict` constructor
+ * @param self `pkpy_Dict` instance
+ */
+void pkpy_Dict__ctor(pkpy_Dict* self);
+
+/**
+ * @brief `pkpy_Dict` destructor
+ * @param self `pkpy_Dict` instance
+ */
+void pkpy_Dict__dtor(pkpy_Dict* self);
+
+/**
+ * @brief Copy a `pkpy_Dict`
+ * @param self `pkpy_Dict` instance
+ * @return a new `pkpy_Dict` instance, must be destructed by the caller
+ */
+pkpy_Dict pkpy_Dict__copy(const pkpy_Dict* self);
+
+/**
+ * @brief Set a key-value pair into the `pkpy_Dict`
+ * @param self `pkpy_Dict` instance
+ * @param vm __eq__ and __hash__ context
+ * @param key key to set
+ * @param val value to set
+ * @return `true` if the key is newly added, `false` if the key already exists
+ */
+bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val);
+
+/**
+ * @brief Check if a key exists in the `pkpy_Dict`
+ * @param self `pkpy_Dict` instance
+ * @param vm __eq__ and __hash__ context
+ * @param key key to check
+ * @return `true` if the key exists, `false` otherwise
+ */
+bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key);
+
+/**
+ * @brief Remove a key from the `pkpy_Dict`
+ * @param self `pkpy_Dict` instance
+ * @param vm __eq__ and __hash__ context
+ * @param key key to remove
+ * @return `true` if the key was found and removed, `false` if the key doesn't exist
+ */
+bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key);
+
+/**
+ * @brief Try to get a value from the `pkpy_Dict`
+ * @param self `pkpy_Dict` instance
+ * @param vm __eq__ and __hash__ context
+ * @param key key to get
+ * @return the value associated with the key, `NULL` if the key doesn't exist
+ */
+const pkpy_Var* pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key);
+
+/**
+ * @brief Update the `pkpy_Dict` with another one
+ * @param self `pkpy_Dict` instance
+ * @param vm __eq__ and __hash__ context
+ * @param other `pkpy_Dict` instance to update with
+ */
+void pkpy_Dict__update(pkpy_Dict* self, void *vm, const pkpy_Dict* other);
+
+/**
+ * @brief Clear the `pkpy_Dict`
+ * @param self `pkpy_Dict` instance
+ */
+void pkpy_Dict__clear(pkpy_Dict* self);
+
+/**
+ * @brief Iterate over the `pkpy_Dict`
+ * @param self `pkpy_Dict` instance
+ * @return an iterator over the `pkpy_Dict`
+ */
+pkpy_DictIter pkpy_Dict__iter(const pkpy_Dict* self);
+
+/**
+ * @brief Iterate over the `pkpy_Dict`
+ * @param self `pkpy_Dict` instance
+ * @param key key will be filled with the current key, can be `NULL` if not needed
+ * @param value value will be filled with the current value, can be `NULL` if not needed
+ * @return `true` if the iteration is still valid, `false` otherwise
+ */
+bool pkpy_DictIter__next(pkpy_DictIter* self, pkpy_Var* key, pkpy_Var* value);
+
+#ifdef __cplusplus
+}
+#endif

+ 72 - 40
include/pocketpy/objects/dict.hpp

@@ -2,63 +2,95 @@
 
 #include "pocketpy/objects/base.hpp"
 #include "pocketpy/objects/tuplelist.hpp"
+#include "pocketpy/objects/dict.h"
 
 namespace pkpy {
 
-struct Dict {
-    struct Item {
-        PyVar first;
-        PyVar second;
-        int prev;
-        int next;
-    };
-
-    constexpr static int __Capacity = 8;
-    constexpr static float __LoadFactor = 0.67f;
-
-    int _capacity;
-    int _mask;
-    int _size;
-    int _critical_size;
-    int _head_idx;  // for order preserving
-    int _tail_idx;  // for order preserving
-    Item* _items;
-
-    Dict();
-    Dict(Dict&& other);
-    Dict(const Dict& other);
+struct Dict : private pkpy_Dict {
+    Dict() {
+        pkpy_Dict__ctor(this);
+    }
+
+    Dict(Dict&& other) {
+        std::memcpy(this, &other, sizeof(Dict));
+        pkpy_Dict__ctor(&other);
+    }
+
+    Dict(const Dict& other) {
+        // OPTIMIZEME: reduce copy
+        auto clone = pkpy_Dict__copy(&other);
+        std::memcpy(this, &clone, sizeof(Dict));
+    }
+    
     Dict& operator= (const Dict&) = delete;
     Dict& operator= (Dict&&) = delete;
 
-    int size() const { return _size; }
+    int size() const { return count; }
 
-    void _probe_0(VM* vm, PyVar key, bool& ok, int& i) const;
-    void _probe_1(VM* vm, PyVar key, bool& ok, int& i) const;
+    void set(VM* vm, PyVar key, PyVar val) {
+        pkpy_Dict__set(this, vm, *(pkpy_Var*)(&key), *(pkpy_Var*)(&val));
+    }
 
-    void set(VM* vm, PyVar key, PyVar val);
-    void _rehash(VM* vm);
+    PyVar try_get(VM* vm, PyVar key) const {
+        auto res = pkpy_Dict__try_get(this, vm, *(pkpy_Var*)(&key));
+        if (!res) return nullptr;
+        return *(const PyVar*)(res);
+    }
 
-    PyVar try_get(VM* vm, PyVar key) const;
+    bool contains(VM* vm, PyVar key) const {
+        return pkpy_Dict__contains(this, vm, *(pkpy_Var*)(&key));
+    }
 
-    bool contains(VM* vm, PyVar key) const;
-    bool del(VM* vm, PyVar key);
-    void update(VM* vm, const Dict& other);
+    bool del(VM* vm, PyVar key) {
+        return pkpy_Dict__del(this, vm, *(pkpy_Var*)(&key));
+    }
+
+    void update(VM* vm, const Dict& other) {
+        pkpy_Dict__update(this, vm, &other);
+    }
 
     template <typename __Func>
     void apply(__Func f) const {
-        int i = _head_idx;
-        while(i != -1) {
-            f(_items[i].first, _items[i].second);
-            i = _items[i].next;
+        pkpy_DictIter it = iter();
+        PyVar key, val;
+        while(pkpy_DictIter__next(&it, (pkpy_Var*)(&key), (pkpy_Var*)(&val))) {
+            f(key, val);
         }
     }
 
-    Tuple keys() const;
-    Tuple values() const;
-    void clear();
-    ~Dict();
+    Tuple keys() const {
+        Tuple res(count);
+        pkpy_DictIter it = iter();
+        PyVar key, val;
+        int i = 0;
+        while(pkpy_DictIter__next(&it, (pkpy_Var*)(&key), (pkpy_Var*)(&val))) {
+            res[i++] = key;
+        }
+        return res;
+    }
+
+    Tuple values() const {
+        Tuple res(count);
+        pkpy_DictIter it = iter();
+        PyVar key, val;
+        int i = 0;
+        while(pkpy_DictIter__next(&it, (pkpy_Var*)(&key), (pkpy_Var*)(&val))) {
+            res[i++] = val;
+        }
+        return res;
+    }
 
-    void __alloc_items();
+    pkpy_DictIter iter() const {
+        return pkpy_Dict__iter(this);
+    }
+
+    void clear() {
+        pkpy_Dict__clear(this);
+    }
+
+    ~Dict() {
+        pkpy_Dict__dtor(this);
+    }
 
     void _gc_mark(VM*) const;
 };

+ 51 - 0
include/pocketpy/objects/pyvar.h

@@ -0,0 +1,51 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+
+/**
+ * @brief A python value in pocketpy.
+ */
+typedef struct {
+    int type;
+    int _0;
+    int64_t _1;
+} pkpy_Var;
+
+/**
+ * @brief Check if the pkpy_Var is null.
+ * @param self The variable to check.
+ * @return True if the variable is null, false otherwise.
+ */
+#define pkpy_Var__is_null(self) ((self)->type == 0)
+
+/**
+ * @brief Set the variable to null.
+ * @param self The variable to set.
+ */
+#define pkpy_Var__set_null(self) do { (self)->type = 0; } while(0)
+
+/**
+ * @brief Check if two pkpy_Vars are equal, respects to __eq__ method.
+ * @param vm The virtual machine.
+ * @param a The first pkpy_Var.
+ * @param b The second pkpy_Var.
+ * @return True if the pkpy_Vars are equal, false otherwise.
+ */
+bool pkpy_Var__eq__(void *vm, pkpy_Var a, pkpy_Var b);
+
+/**
+ * @brief Get the hash of the pkpy_Var, respects to __hash__ method.
+ * @param vm The virtual machine.
+ * @param a The pkpy_Var to hash.
+ * @return The hash of the pkpy_Var.
+ */
+int64_t pkpy_Var__hash__(void *vm, pkpy_Var a);
+
+#ifdef __cplusplus
+}
+#endif

+ 7 - 6
src/interpreter/iter.cpp

@@ -117,12 +117,13 @@ void DictItemsIter::_register(VM* vm, PyObject* mod, PyObject* type) {
     });
     vm->bind__next__(type->as<Type>(), [](VM* vm, PyVar _0) -> unsigned {
         DictItemsIter& self = _CAST(DictItemsIter&, _0);
-        Dict& d = PK_OBJ_GET(Dict, self.ref);
-        if(self.i == -1) return 0;
-        vm->s_data.push(d._items[self.i].first);
-        vm->s_data.push(d._items[self.i].second);
-        self.i = d._items[self.i].next;
-        return 2;
+        PyVar key, val;
+        if (pkpy_DictIter__next(&self.it, (pkpy_Var*)(&key), (pkpy_Var*)(&val))) {
+            vm->s_data.push(key);
+            vm->s_data.push(val);
+            return 2;
+        }
+        return 0;
     });
 }
 

+ 0 - 31
src/interpreter/vm.cpp

@@ -1628,37 +1628,6 @@ BIND_BINARY_SPECIAL(__xor__)
 
 #undef BIND_BINARY_SPECIAL
 
-void Dict::_probe_0(VM* vm, PyVar key, bool& ok, int& i) const {
-    ok = false;
-    i64 hash = vm->py_hash(key);
-    i = hash & _mask;
-    for(int j = 0; j < _capacity; j++) {
-        if(_items[i].first != nullptr) {
-            if(vm->py_eq(_items[i].first, key)) {
-                ok = true;
-                break;
-            }
-        } else {
-            if(_items[i].second == nullptr) break;
-        }
-        // https://github.com/python/cpython/blob/3.8/Objects/dictobject.c#L166
-        i = ((5 * i) + 1) & _mask;
-    }
-}
-
-void Dict::_probe_1(VM* vm, PyVar key, bool& ok, int& i) const {
-    ok = false;
-    i = vm->py_hash(key) & _mask;
-    while(_items[i].first != nullptr) {
-        if(vm->py_eq(_items[i].first, key)) {
-            ok = true;
-            break;
-        }
-        // https://github.com/python/cpython/blob/3.8/Objects/dictobject.c#L166
-        i = ((5 * i) + 1) & _mask;
-    }
-}
-
 #if PK_ENABLE_PROFILER
 void NextBreakpoint::_step(VM* vm) {
     int curr_callstack_size = vm->callstack.size();

+ 260 - 0
src/objects/dict.c

@@ -0,0 +1,260 @@
+#include "pocketpy/objects/dict.h"
+#include "pocketpy/common/utils.h"
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+
+#define DICT_MAX_LOAD 0.75
+#define DICT_HASH_NEXT(h) ((h) * 5 + 1)
+#define DICT_HASH_TRANS(h) ((int)((h) & 0xffffffff)) // used for tansform value from __hash__
+#define PK_DICT_COMPACT_MODE 1
+
+struct pkpy_DictEntry {
+    pkpy_Var key;
+    pkpy_Var val;
+};
+
+inline extern int pkpy_Dict__idx_size(const pkpy_Dict* self) {
+#if PK_DICT_COMPACT_MODE
+    if(self->_htcap < 255) return 1;
+    if(self->_htcap < 65535) return 2;
+#endif
+    return 4;
+}
+
+inline extern int pkpy_Dict__idx_null(const pkpy_Dict* self) {
+#if PK_DICT_COMPACT_MODE
+    if(self->_htcap < 255) return 255;
+    if(self->_htcap < 65535) return 65535;
+#endif
+    return -1;
+}
+
+inline extern int pkpy_Dict__ht_byte_size(const pkpy_Dict* self) { return self->_htcap * pkpy_Dict__idx_size(self); }
+
+void pkpy_Dict__ctor(pkpy_Dict* self) {
+    self->count = 0;
+    c11_vector__ctor(&self->_entries, sizeof(struct pkpy_DictEntry));
+    self->_htcap = 16;
+    self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self));
+    memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self));
+}
+
+void pkpy_Dict__dtor(pkpy_Dict* self) {
+    c11_vector__dtor(&self->_entries);
+    free(self->_hashtable);
+}
+
+pkpy_Dict pkpy_Dict__copy(const pkpy_Dict* self) {
+    int ht_size = pkpy_Dict__ht_byte_size(self);
+    void* ht_clone = malloc(ht_size);
+    memcpy(ht_clone, self->_hashtable, ht_size);
+    return (pkpy_Dict){.count = self->count,
+                       ._entries = c11_vector__copy(&self->_entries),
+                       ._htcap = self->_htcap,
+                       ._hashtable = ht_clone};
+}
+
+static int pkpy_Dict__htget(const pkpy_Dict* self, int h) {
+#if PK_DICT_COMPACT_MODE
+    const int *p = (const int*)(((const char*)self->_hashtable) + h * pkpy_Dict__idx_size(self));
+    return (*p) & pkpy_Dict__idx_null(self);
+#else
+    return ((const int*)self->_hashtable)[h];
+#endif
+}
+
+static void pkpy_Dict__htset(pkpy_Dict* self, int h, int v) {
+#if PK_DICT_COMPACT_MODE
+    int *p = (int*)(((char*)self->_hashtable) + h * pkpy_Dict__idx_size(self));
+    *p = v | (*p & ~pkpy_Dict__idx_null(self));
+#else
+    ((int*)self->_hashtable)[h] = v;
+#endif
+}
+
+static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int hash) {
+    const int null = pkpy_Dict__idx_null(self);
+    const int mask = self->_htcap - 1;
+    for(int h = hash & mask;; h = DICT_HASH_NEXT(h) & mask) {
+        int idx = pkpy_Dict__htget(self, h);
+        if(idx == null) return h;
+
+        struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx);
+        if(pkpy_Var__is_null(&entry->key)) return h;
+    }
+    PK_UNREACHABLE();
+}
+
+static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int hash) {
+    const int null = pkpy_Dict__idx_null(self);
+    const int mask = self->_htcap - 1;
+    for(int h = hash & mask;; h = DICT_HASH_NEXT(h) & mask) {
+        int idx = pkpy_Dict__htget(self, h);
+        if(idx == null) return h;
+
+        struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx);
+        if(pkpy_Var__is_null(&entry->key)) continue;
+        if(pkpy_Var__eq__(vm, entry->key, key)) return h;
+    }
+    PK_UNREACHABLE();
+}
+
+static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) {
+    free(self->_hashtable);
+    self->_htcap *= 2;
+    self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self));
+    memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self));
+
+    for(int i = 0; i < self->_entries.count; i++) {
+        struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i);
+        if(pkpy_Var__is_null(&entry->key)) continue;
+
+        int rhash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, entry->key));
+        int h = pkpy_Dict__probe0(self, vm, entry->key, rhash);
+        pkpy_Dict__htset(self, h, i);
+    }
+}
+
+bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) {
+    int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key));
+    int h = pkpy_Dict__probe1(self, vm, key, hash);
+
+    int idx = pkpy_Dict__htget(self, h);
+    if(idx == pkpy_Dict__idx_null(self)) {
+        idx = self->_entries.count;
+        c11_vector__push(struct pkpy_DictEntry,
+                         &self->_entries,
+                         ((struct pkpy_DictEntry){
+                             .key = key,
+                             .val = val,
+                         }));
+        h = pkpy_Dict__probe0(self, vm, key, hash);
+        pkpy_Dict__htset(self, h, idx);
+        self->count += 1;
+        if(self->count >= self->_htcap * DICT_MAX_LOAD) pkpy_Dict__extendht(self, vm);
+        return true;
+    }
+
+    struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx);
+
+    if(pkpy_Var__eq__(vm, entry->key, key)) {
+        entry->val = val;
+    } else {
+        self->count += 1;
+        h = pkpy_Dict__probe0(self, vm, key, hash);
+        idx = pkpy_Dict__htget(self, h);
+        struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx);
+        entry->key = key;
+        entry->val = val;
+    }
+    return false;
+}
+
+bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) {
+    int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key));
+    int h = pkpy_Dict__probe1(self, vm, key, hash);
+
+    int idx = pkpy_Dict__htget(self, h);
+    if(idx == pkpy_Dict__idx_null(self)) return false;
+
+    struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx);
+    return true;
+}
+
+static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) {
+    int deleted_slots = self->_entries.count - self->count;
+    if(deleted_slots <= 8 || deleted_slots < self->_entries.count * (1 - DICT_MAX_LOAD)) return false;
+
+    // shrink
+    // free(self->_hashtable);
+    // while(self->_htcap * DICT_MAX_LOAD / 2 > self->count && self->_htcap >= 32)
+    //     self->_htcap /= 2;
+    // self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self));
+    memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self));
+
+    int new_cnt = 0;
+    for (int i = 0; i < self->_entries.count; ++i) {
+        struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i);
+        if(pkpy_Var__is_null(&entry->key)) continue;
+        if (i > new_cnt) c11__setitem(struct pkpy_DictEntry, &self->_entries, new_cnt, *entry);
+        new_cnt += 1;
+    }
+
+    self->_entries.count = new_cnt;
+    for(int i = 0; i < self->_entries.count; i++) {
+        struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i);
+        if(pkpy_Var__is_null(&entry->key)) continue;
+
+        int rhash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, entry->key));
+        int h = pkpy_Dict__probe0(self, vm, entry->key, rhash);
+        pkpy_Dict__htset(self, h, i);
+    }
+    return true;
+}
+
+bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) {
+    int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key));
+    int h = pkpy_Dict__probe1(self, vm, key, hash);
+    int idx = pkpy_Dict__htget(self, h), null = pkpy_Dict__idx_null(self);
+    if(idx == null) return false;
+    
+    struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx);
+    pkpy_Var__set_null(&entry->key);
+    self->count -= 1;
+    pkpy_Dict__refactor(self, vm);
+    return true;
+}
+
+const pkpy_Var *pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key) {
+    int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key));
+    int h = pkpy_Dict__probe1(self, vm, key, hash);
+    
+    int idx = pkpy_Dict__htget(self, h);
+    if(idx == pkpy_Dict__idx_null(self)) return NULL;
+    
+    struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx);
+    return &entry->val;
+}
+
+void pkpy_Dict__update(pkpy_Dict *self, void *vm, const pkpy_Dict *other) {
+    for(int i = 0; i < other->_entries.count; i++) {
+        struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &other->_entries, i);
+        if(pkpy_Var__is_null(&entry->key)) continue;
+        pkpy_Dict__set(self, vm, entry->key, entry->val);
+    }
+}
+
+void pkpy_Dict__clear(pkpy_Dict *self) {
+    self->count = 0;
+    self->_entries.count = 0;
+    memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self));
+}
+
+static int pkpy_Dict__next_entry_idx(const pkpy_Dict* self, int idx) {
+    while (idx < self->_entries.count) {
+        struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx);
+        if(!pkpy_Var__is_null(&entry->key)) break;
+        idx++;
+    }
+    return idx;
+}
+
+pkpy_DictIter pkpy_Dict__iter(const pkpy_Dict *self) {
+    return (pkpy_DictIter){
+        ._dict = self,
+        ._index = pkpy_Dict__next_entry_idx(self, 0),
+    };
+}
+
+bool pkpy_DictIter__next(pkpy_DictIter *self, pkpy_Var *key, pkpy_Var *val) {
+    if(self->_index >= self->_dict->_entries.count) return false;
+    
+    struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_dict->_entries, self->_index);
+    if(pkpy_Var__is_null(&entry->key)) return false;
+    if (key) *key = entry->key;
+    if (val) *val = entry->val;
+
+    self->_index = pkpy_Dict__next_entry_idx(self->_dict, self->_index + 1);
+    return true;
+}

+ 0 - 180
src/objects/dict.cpp

@@ -1,180 +0,0 @@
-#include "pocketpy/objects/dict.hpp"
-
-namespace pkpy {
-
-Dict::Dict() :
-    _capacity(__Capacity), _mask(__Capacity - 1), _size(0), _critical_size(__Capacity * __LoadFactor + 0.5f),
-    _head_idx(-1), _tail_idx(-1) {
-    __alloc_items();
-}
-
-void Dict::__alloc_items() {
-    _items = (Item*)std::malloc(_capacity * sizeof(Item));
-    for(int i = 0; i < _capacity; i++) {
-        _items[i].first = nullptr;
-        _items[i].second = nullptr;
-        _items[i].prev = -1;
-        _items[i].next = -1;
-    }
-}
-
-Dict::Dict(Dict&& other) {
-    _capacity = other._capacity;
-    _mask = other._mask;
-    _size = other._size;
-    _critical_size = other._critical_size;
-    _head_idx = other._head_idx;
-    _tail_idx = other._tail_idx;
-    _items = other._items;
-    other._items = nullptr;
-}
-
-Dict::Dict(const Dict& other) {
-    _capacity = other._capacity;
-    _mask = other._mask;
-    _size = other._size;
-    _critical_size = other._critical_size;
-    _head_idx = other._head_idx;
-    _tail_idx = other._tail_idx;
-    // copy items
-    _items = (Item*)std::malloc(_capacity * sizeof(Item));
-    std::memcpy(_items, other._items, _capacity * sizeof(Item));
-}
-
-void Dict::set(VM* vm, PyVar key, PyVar val) {
-    // do possible rehash
-    if(_size + 1 > _critical_size) _rehash(vm);
-    bool ok;
-    int i;
-    _probe_1(vm, key, ok, i);
-    if(!ok) {
-        _size++;
-        _items[i].first = key;
-
-        // append to tail
-        if(_size == 0 + 1) {
-            _head_idx = i;
-            _tail_idx = i;
-        } else {
-            _items[i].prev = _tail_idx;
-            _items[_tail_idx].next = i;
-            _tail_idx = i;
-        }
-    }
-    _items[i].second = val;
-}
-
-void Dict::_rehash(VM* vm) {
-    Item* old_items = _items;
-    int old_head_idx = _head_idx;
-
-    _capacity *= 4;
-    _mask = _capacity - 1;
-    _size = 0;
-    _critical_size = _capacity * __LoadFactor + 0.5f;
-    _head_idx = -1;
-    _tail_idx = -1;
-
-    __alloc_items();
-
-    // copy old items to new dict
-    int i = old_head_idx;
-    while(i != -1) {
-        set(vm, old_items[i].first, old_items[i].second);
-        i = old_items[i].next;
-    }
-
-    std::free(old_items);
-}
-
-PyVar Dict::try_get(VM* vm, PyVar key) const {
-    bool ok;
-    int i;
-    _probe_0(vm, key, ok, i);
-    if(!ok) return nullptr;
-    return _items[i].second;
-}
-
-bool Dict::contains(VM* vm, PyVar key) const {
-    bool ok;
-    int i;
-    _probe_0(vm, key, ok, i);
-    return ok;
-}
-
-bool Dict::del(VM* vm, PyVar key) {
-    bool ok;
-    int i;
-    _probe_0(vm, key, ok, i);
-    if(!ok) return false;
-    _items[i].first = nullptr;
-    // _items[i].second = PY_DELETED_SLOT;  // do not change .second if it is not NULL, it means the slot is occupied by
-    // a deleted item
-    _size--;
-
-    if(_size == 0) {
-        _head_idx = -1;
-        _tail_idx = -1;
-    } else {
-        if(_head_idx == i) {
-            _head_idx = _items[i].next;
-            _items[_head_idx].prev = -1;
-        } else if(_tail_idx == i) {
-            _tail_idx = _items[i].prev;
-            _items[_tail_idx].next = -1;
-        } else {
-            _items[_items[i].prev].next = _items[i].next;
-            _items[_items[i].next].prev = _items[i].prev;
-        }
-    }
-    _items[i].prev = -1;
-    _items[i].next = -1;
-    return true;
-}
-
-void Dict::update(VM* vm, const Dict& other) {
-    other.apply([&](PyVar k, PyVar v) {
-        set(vm, k, v);
-    });
-}
-
-Tuple Dict::keys() const {
-    Tuple t(_size);
-    int i = _head_idx;
-    int j = 0;
-    while(i != -1) {
-        t[j++] = _items[i].first;
-        i = _items[i].next;
-    }
-    assert(j == _size);
-    return t;
-}
-
-Tuple Dict::values() const {
-    Tuple t(_size);
-    int i = _head_idx;
-    int j = 0;
-    while(i != -1) {
-        t[j++] = _items[i].second;
-        i = _items[i].next;
-    }
-    assert(j == _size);
-    return t;
-}
-
-void Dict::clear() {
-    _size = 0;
-    _head_idx = -1;
-    _tail_idx = -1;
-    for(int i = 0; i < _capacity; i++) {
-        _items[i].first = nullptr;
-        _items[i].second = nullptr;
-        _items[i].prev = -1;
-        _items[i].next = -1;
-    }
-}
-
-Dict::~Dict() {
-    if(_items) std::free(_items);
-}
-}  // namespace pkpy

+ 17 - 0
src/objects/pyvar.cpp

@@ -0,0 +1,17 @@
+#include "pocketpy/objects/base.hpp"
+#include "pocketpy/objects/pyvar.h"
+#include "pocketpy/interpreter/vm.hpp"
+
+extern "C" {
+
+bool pkpy_Var__eq__(void* vm_, pkpy_Var a, pkpy_Var b) {
+    auto vm = (pkpy::VM*)(vm_);
+    return vm->py_eq(*(pkpy::PyVar*)(&a), *(pkpy::PyVar*)(&b));
+}
+
+int64_t pkpy_Var__hash__(void* vm_, pkpy_Var a) {
+    auto vm = (pkpy::VM*)(vm_);
+    return vm->py_hash(*(pkpy::PyVar*)(&a));
+}
+
+}

+ 6 - 6
src/pocketpy.cpp

@@ -1493,12 +1493,12 @@ void __init_builtins(VM* _vm) {
         if(!vm->isinstance(_1, vm->tp_dict)) return vm->NotImplemented;
         Dict& other = _CAST(Dict&, _1);
         if(self.size() != other.size()) return vm->False;
-        for(int i = 0; i < self._capacity; i++) {
-            auto item = self._items[i];
-            if(item.first == nullptr) continue;
-            PyVar value = other.try_get(vm, item.first);
-            if(value == nullptr) return vm->False;
-            if(!vm->py_eq(item.second, value)) return vm->False;
+        pkpy_DictIter it = self.iter();
+        PyVar key, val;
+        while(pkpy_DictIter__next(&it, (pkpy_Var*)(&key), (pkpy_Var*)(&val))) {
+            PyVar other_val = other.try_get(vm, key);
+            if(other_val == nullptr) return vm->False;
+            if(!vm->py_eq(val, other_val)) return vm->False;
         }
         return vm->True;
     });

+ 11 - 1
tests/07_dict.py

@@ -159,6 +159,17 @@ try:
 except TypeError:
     pass
 
+n = 2 ** 17
+a = {}
+for i in range(n):
+    a[str(i)] = i
+
+for i in range(n):
+    y = a[str(i)]
+
+for i in range(n):
+    del a[str(i)]
+
 a = {1: 2, 3: 4}
 a['a'] = a
 assert repr(a) == "{1: 2, 3: 4, 'a': {...}}"
@@ -169,4 +180,3 @@ gc.collect()
 for k, v in a.items():
     pass
 assert gc.collect() == 1
-