blueloveTH 10 месяцев назад
Родитель
Сommit
0d9bf2e7de

+ 2 - 0
include/pocketpy/common/str.h

@@ -13,6 +13,8 @@ typedef struct c11_string {
     char data[];  // flexible array member
 } c11_string;
 
+c11_string* pk_tostr(py_Ref self);
+
 /* bytes */
 typedef struct c11_bytes {
     int size;

+ 0 - 2
include/pocketpy/interpreter/vm.h

@@ -34,8 +34,6 @@ typedef struct VM {
 
     py_Callbacks callbacks;
 
-    py_TValue ascii_literals[128 + 1];
-
     py_TValue last_retval;
     py_TValue curr_exception;
 

+ 1 - 0
include/pocketpy/objects/base.h

@@ -21,5 +21,6 @@ typedef struct py_TValue {
         c11_vec2i _vec2i;
         c11_color32 _color32;
         void* _ptr;
+        char _chars[8];
     };
 } py_TValue;

+ 1 - 1
include/pocketpy/objects/namedict.h

@@ -14,7 +14,7 @@
 
 /* A simple binary tree for storing modules. */
 typedef struct ModuleDict {
-    const char* path;
+    char path[PK_MAX_MODULE_PATH_LEN + 1];
     py_TValue module;
     struct ModuleDict* left;
     struct ModuleDict* right;

+ 1 - 1
src/common/sstream.c

@@ -11,7 +11,7 @@
 
 void c11_sbuf__ctor(c11_sbuf* self) {
     c11_vector__ctor(&self->data, sizeof(char));
-    c11_vector__reserve(&self->data, sizeof(c11_string) + 100);
+    c11_vector__reserve(&self->data, sizeof(c11_string) + 64);
     self->data.length = sizeof(c11_string);
 }
 

+ 4 - 16
src/compiler/compiler.c

@@ -63,7 +63,6 @@ typedef struct Ctx {
     bool is_compiling_class;
     c11_vector /*T=Expr* */ s_expr;
     c11_smallmap_n2i global_names;
-    c11_smallmap_s2n co_consts_string_dedup_map;
 } Ctx;
 
 typedef struct Expr Expr;
@@ -1081,7 +1080,6 @@ static void Ctx__ctor(Ctx* self, CodeObject* co, FuncDecl* func, int level) {
     self->is_compiling_class = false;
     c11_vector__ctor(&self->s_expr, sizeof(Expr*));
     c11_smallmap_n2i__ctor(&self->global_names);
-    c11_smallmap_s2n__ctor(&self->co_consts_string_dedup_map);
 }
 
 static void Ctx__dtor(Ctx* self) {
@@ -1091,7 +1089,6 @@ static void Ctx__dtor(Ctx* self) {
     }
     c11_vector__dtor(&self->s_expr);
     c11_smallmap_n2i__dtor(&self->global_names);
-    c11_smallmap_s2n__dtor(&self->co_consts_string_dedup_map);
 }
 
 static int Ctx__prepare_loop_divert(Ctx* self, int line, bool is_break) {
@@ -1201,19 +1198,10 @@ static int Ctx__add_varname(Ctx* self, py_Name name) {
 }
 
 static int Ctx__add_const_string(Ctx* self, c11_sv key) {
-    uint16_t* val = c11_smallmap_s2n__try_get(&self->co_consts_string_dedup_map, key);
-    if(val) {
-        return *val;
-    } else {
-        py_TValue tmp;
-        py_newstrv(&tmp, key);
-        c11_vector__push(py_TValue, &self->co->consts, tmp);
-        int index = self->co->consts.length - 1;
-        c11_smallmap_s2n__set(&self->co_consts_string_dedup_map,
-                              c11_string__sv(PyObject__userdata(tmp._obj)),
-                              index);
-        return index;
-    }
+    py_Ref p = c11_vector__emplace(&self->co->consts);
+    py_newstrv(p, key);
+    int index = self->co->consts.length - 1;
+    return index;
 }
 
 static int Ctx__add_const(Ctx* self, py_Ref v) {

+ 1 - 1
src/interpreter/heap.c

@@ -44,7 +44,7 @@ void ManagedHeap__collect_if_needed(ManagedHeap* self) {
     const int lower = PK_GC_MIN_THRESHOLD / 2;
     float free_ratio = (float)avg_freed / self->gc_threshold;
     int new_threshold = self->gc_threshold * (1 / free_ratio);
-    // printf("gc_threshold=%d, avg_freed=%d, new_threshold=%d\n", self->gc_threshold, avg_freed, new_threshold);
+    printf("gc_threshold=%d, avg_freed=%d, new_threshold=%d\n", self->gc_threshold, avg_freed, new_threshold);
     self->gc_threshold = c11__min(c11__max(new_threshold, lower), upper);
 }
 

+ 1 - 11
src/interpreter/vm.c

@@ -59,7 +59,7 @@ void VM__ctor(VM* self) {
     self->top_frame = NULL;
     InternedNames__ctor(&self->names);
 
-    ModuleDict__ctor(&self->modules, NULL, *py_NIL());
+    ModuleDict__ctor(&self->modules, "", *py_NIL());
     TypeList__ctor(&self->types);
 
     self->builtins = *py_NIL();
@@ -88,12 +88,6 @@ void VM__ctor(VM* self) {
     ValueStack__ctor(&self->stack);
 
     /* Init Builtin Types */
-    for(int i = 0; i < 128; i++) {
-        char* p = py_newstrn(&self->ascii_literals[i], 1);
-        *p = i;
-    }
-    py_newstrn(&self->ascii_literals[128], 0);  // empty string
-
     // 0: unused
     void* placeholder = TypeList__emplace(&self->types);
     memset(placeholder, 0, sizeof(py_TypeInfo));
@@ -634,10 +628,6 @@ void ManagedHeap__mark(ManagedHeap* self) {
     for(py_TValue* p = vm->stack.begin; p != vm->stack.end; p++) {
         pk__mark_value(p);
     }
-    // mark ascii literals
-    for(int i = 0; i < c11__count_array(vm->ascii_literals); i++) {
-        pk__mark_value(&vm->ascii_literals[i]);
-    }
     // mark modules
     ModuleDict__apply_mark(&vm->modules, p_stack);
     // mark types

+ 16 - 10
src/objects/namedict.c

@@ -9,7 +9,11 @@
 #undef SMALLMAP_T__SOURCE
 
 void ModuleDict__ctor(ModuleDict* self, const char* path, py_TValue module) {
-    self->path = path;
+    assert(path != NULL);
+    int length = strlen(path);
+    assert(length <= PK_MAX_MODULE_PATH_LEN);
+    memcpy(self->path, path, length);
+    self->path[length] = '\0';
     self->module = module;
     self->left = NULL;
     self->right = NULL;
@@ -27,10 +31,7 @@ void ModuleDict__dtor(ModuleDict* self) {
 }
 
 void ModuleDict__set(ModuleDict* self, const char* key, py_TValue val) {
-    if(self->path == NULL) {
-        self->path = key;
-        self->module = val;
-    }
+    assert(key != NULL);
     int cmp = strcmp(key, self->path);
     if(cmp < 0) {
         if(self->left) {
@@ -52,7 +53,7 @@ void ModuleDict__set(ModuleDict* self, const char* key, py_TValue val) {
 }
 
 py_TValue* ModuleDict__try_get(ModuleDict* self, const char* path) {
-    if(self->path == NULL) return NULL;
+    assert(path != NULL);
     int cmp = strcmp(path, self->path);
     if(cmp < 0) {
         if(self->left) {
@@ -72,14 +73,19 @@ py_TValue* ModuleDict__try_get(ModuleDict* self, const char* path) {
 }
 
 bool ModuleDict__contains(ModuleDict* self, const char* path) {
+    assert(path != NULL);
     return ModuleDict__try_get(self, path) != NULL;
 }
 
 void ModuleDict__apply_mark(ModuleDict* self, c11_vector* p_stack) {
-    PyObject* obj = self->module._obj;
-    if(!obj->gc_marked) {
-        obj->gc_marked = true;
-        c11_vector__push(PyObject*, p_stack, obj);
+    if(!py_isnil(&self->module)) {
+        // root node is dummy
+        PyObject* obj = self->module._obj;
+        assert(obj != NULL);
+        if(!obj->gc_marked) {
+            obj->gc_marked = true;
+            c11_vector__push(PyObject*, p_stack, obj);
+        }
     }
     if(self->left) ModuleDict__apply_mark(self->left, p_stack);
     if(self->right) ModuleDict__apply_mark(self->right, p_stack);

+ 5 - 9
src/public/modules.c

@@ -450,15 +450,11 @@ static bool builtins_chr(int argc, py_Ref argv) {
     PY_CHECK_ARGC(1);
     PY_CHECK_ARG_TYPE(0, tp_int);
     uint32_t val = py_toint(py_arg(0));
-    if(val >= 0 && val < 128) {
-        py_assign(py_retval(), &pk_current_vm->ascii_literals[val]);
-    } else {
-        // convert to utf-8
-        char utf8[4];
-        int len = c11__u32_to_u8(val, utf8);
-        if(len == -1) return ValueError("invalid unicode code point: %d", val);
-        py_newstrv(py_retval(), (c11_sv){utf8, len});
-    }
+    // convert to utf-8
+    char utf8[4];
+    int len = c11__u32_to_u8(val, utf8);
+    if(len == -1) return ValueError("invalid unicode code point: %d", val);
+    py_newstrv(py_retval(), (c11_sv){utf8, len});
     return true;
 }
 

+ 7 - 0
src/public/py_ops.c

@@ -9,6 +9,13 @@ bool py_isidentical(py_Ref lhs, py_Ref rhs) {
         case tp_int: return lhs->_i64 == rhs->_i64;
         case tp_float: return lhs->_f64 == rhs->_f64;
         case tp_bool: return lhs->_bool == rhs->_bool;
+        case tp_str: {
+            if(lhs->is_ptr && rhs->is_ptr) {
+                return lhs->_obj == rhs->_obj;
+            } else {
+                return strcmp(lhs->_chars, rhs->_chars) == 0;
+            }
+        }
         case tp_nativefunc: return lhs->_cfunc == rhs->_cfunc;
         case tp_NoneType: return true;
         case tp_NotImplementedType: return true;

+ 48 - 49
src/public/py_str.c

@@ -9,6 +9,13 @@
 void py_newstr(py_Ref out, const char* data) { py_newstrv(out, (c11_sv){data, strlen(data)}); }
 
 char* py_newstrn(py_Ref out, int size) {
+    if(size < 8) {
+        out->type = tp_str;
+        out->is_ptr = false;
+        c11_string* ud = (c11_string*)(&out->extra);
+        c11_string__ctor3(ud, size);
+        return ud->data;
+    }
     ManagedHeap* heap = &pk_current_vm->heap;
     int total_size = sizeof(c11_string) + size + 1;
     PyObject* obj = ManagedHeap__gcnew(heap, tp_str, 0, total_size);
@@ -21,17 +28,6 @@ char* py_newstrn(py_Ref out, int size) {
 }
 
 void py_newstrv(py_OutRef out, c11_sv sv) {
-    if(sv.size == 0) {
-        *out = pk_current_vm->ascii_literals[128];
-        return;
-    }
-    if(sv.size == 1) {
-        int c = sv.data[0];
-        if(c >= 0 && c < 128) {
-            *out = pk_current_vm->ascii_literals[c];
-            return;
-        }
-    }
     char* data = py_newstrn(out, sv.size);
     memcpy(data, sv.data, sv.size);
 }
@@ -58,22 +54,25 @@ unsigned char* py_newbytes(py_Ref out, int size) {
     return ud->data;
 }
 
-const char* py_tostr(py_Ref self) {
+c11_string* pk_tostr(py_Ref self) {
     assert(self->type == tp_str);
-    c11_string* ud = PyObject__userdata(self->_obj);
-    return ud->data;
+    if(!self->is_ptr) {
+        return (c11_string*)(&self->extra);
+    } else {
+        return PyObject__userdata(self->_obj);
+    }
 }
 
+const char* py_tostr(py_Ref self) { return pk_tostr(self)->data; }
+
 const char* py_tostrn(py_Ref self, int* size) {
-    assert(self->type == tp_str);
-    c11_string* ud = PyObject__userdata(self->_obj);
+    c11_string* ud = pk_tostr(self);
     *size = ud->size;
     return ud->data;
 }
 
 c11_sv py_tosv(py_Ref self) {
-    assert(self->type == tp_str);
-    c11_string* ud = PyObject__userdata(self->_obj);
+    c11_string* ud = pk_tostr(self);
     return c11_string__sv(ud);
 }
 
@@ -116,18 +115,18 @@ static bool str__hash__(int argc, py_Ref argv) {
 
 static bool str__len__(int argc, py_Ref argv) {
     PY_CHECK_ARGC(1);
-    c11_string* self = py_touserdata(&argv[0]);
+    c11_string* self = pk_tostr(&argv[0]);
     py_newint(py_retval(), c11_sv__u8_length((c11_sv){self->data, self->size}));
     return true;
 }
 
 static bool str__add__(int argc, py_Ref argv) {
     PY_CHECK_ARGC(2);
-    c11_string* self = py_touserdata(&argv[0]);
+    c11_string* self = pk_tostr(&argv[0]);
     if(py_arg(1)->type != tp_str) {
         py_newnotimplemented(py_retval());
     } else {
-        c11_string* other = py_touserdata(&argv[1]);
+        c11_string* other = pk_tostr(&argv[1]);
         char* p = py_newstrn(py_retval(), self->size + other->size);
         memcpy(p, self->data, self->size);
         memcpy(p + self->size, other->data, other->size);
@@ -137,7 +136,7 @@ static bool str__add__(int argc, py_Ref argv) {
 
 static bool str__mul__(int argc, py_Ref argv) {
     PY_CHECK_ARGC(2);
-    c11_string* self = py_touserdata(&argv[0]);
+    c11_string* self = pk_tostr(&argv[0]);
     if(py_arg(1)->type != tp_int) {
         py_newnotimplemented(py_retval());
     } else {
@@ -158,11 +157,11 @@ static bool str__rmul__(int argc, py_Ref argv) { return str__mul__(argc, argv);
 
 static bool str__contains__(int argc, py_Ref argv) {
     PY_CHECK_ARGC(2);
-    c11_string* self = py_touserdata(&argv[0]);
+    c11_string* self = pk_tostr(&argv[0]);
     if(py_arg(1)->type != tp_str) {
         py_newnotimplemented(py_retval());
     } else {
-        c11_string* other = py_touserdata(&argv[1]);
+        c11_string* other = pk_tostr(&argv[1]);
         const char* p = strstr(self->data, other->data);
         py_newbool(py_retval(), p != NULL);
     }
@@ -194,7 +193,7 @@ static bool str__iter__(int argc, py_Ref argv) {
 
 static bool str__getitem__(int argc, py_Ref argv) {
     PY_CHECK_ARGC(2);
-    c11_sv self = c11_string__sv(py_touserdata(&argv[0]));
+    c11_sv self = c11_string__sv(pk_tostr(&argv[0]));
     py_Ref _1 = py_arg(1);
     if(_1->type == tp_int) {
         int index = py_toint(py_arg(1));
@@ -218,11 +217,11 @@ static bool str__getitem__(int argc, py_Ref argv) {
 #define DEF_STR_CMP_OP(op, __f, __cond)                                                            \
     static bool str##op(int argc, py_Ref argv) {                                                   \
         PY_CHECK_ARGC(2);                                                                          \
-        c11_string* self = py_touserdata(&argv[0]);                                                \
+        c11_string* self = pk_tostr(&argv[0]);                                                     \
         if(py_arg(1)->type != tp_str) {                                                            \
             py_newnotimplemented(py_retval());                                                     \
         } else {                                                                                   \
-            c11_string* other = py_touserdata(&argv[1]);                                           \
+            c11_string* other = pk_tostr(&argv[1]);                                                \
             int res = __f(c11_string__sv(self), c11_string__sv(other));                            \
             py_newbool(py_retval(), __cond);                                                       \
         }                                                                                          \
@@ -240,7 +239,7 @@ DEF_STR_CMP_OP(__ge__, c11_sv__cmp, res >= 0)
 
 static bool str_lower(int argc, py_Ref argv) {
     PY_CHECK_ARGC(1);
-    c11_string* self = py_touserdata(&argv[0]);
+    c11_string* self = pk_tostr(&argv[0]);
     char* p = py_newstrn(py_retval(), self->size);
     for(int i = 0; i < self->size; i++) {
         char c = self->data[i];
@@ -251,7 +250,7 @@ static bool str_lower(int argc, py_Ref argv) {
 
 static bool str_upper(int argc, py_Ref argv) {
     PY_CHECK_ARGC(1);
-    c11_string* self = py_touserdata(&argv[0]);
+    c11_string* self = pk_tostr(&argv[0]);
     char* p = py_newstrn(py_retval(), self->size);
     for(int i = 0; i < self->size; i++) {
         char c = self->data[i];
@@ -262,25 +261,25 @@ static bool str_upper(int argc, py_Ref argv) {
 
 static bool str_startswith(int argc, py_Ref argv) {
     PY_CHECK_ARGC(2);
-    c11_string* self = py_touserdata(&argv[0]);
+    c11_string* self = pk_tostr(&argv[0]);
     PY_CHECK_ARG_TYPE(1, tp_str);
-    c11_string* other = py_touserdata(&argv[1]);
+    c11_string* other = pk_tostr(&argv[1]);
     py_newbool(py_retval(), c11_sv__startswith(c11_string__sv(self), c11_string__sv(other)));
     return true;
 }
 
 static bool str_endswith(int argc, py_Ref argv) {
     PY_CHECK_ARGC(2);
-    c11_string* self = py_touserdata(&argv[0]);
+    c11_string* self = pk_tostr(&argv[0]);
     PY_CHECK_ARG_TYPE(1, tp_str);
-    c11_string* other = py_touserdata(&argv[1]);
+    c11_string* other = pk_tostr(&argv[1]);
     py_newbool(py_retval(), c11_sv__endswith(c11_string__sv(self), c11_string__sv(other)));
     return true;
 }
 
 static bool str_join(int argc, py_Ref argv) {
     PY_CHECK_ARGC(2);
-    c11_sv self = c11_string__sv(py_touserdata(argv));
+    c11_sv self = c11_string__sv(pk_tostr(argv));
 
     if(!py_iter(py_arg(1))) return false;
     py_push(py_retval());  // iter
@@ -302,7 +301,7 @@ static bool str_join(int argc, py_Ref argv) {
             c11_sbuf__dtor(&buf);
             return false;
         }
-        c11_string* item = py_touserdata(py_retval());
+        c11_string* item = pk_tostr(py_retval());
         c11_sbuf__write_cstrn(&buf, item->data, item->size);
         first = false;
     }
@@ -314,11 +313,11 @@ static bool str_join(int argc, py_Ref argv) {
 
 static bool str_replace(int argc, py_Ref argv) {
     PY_CHECK_ARGC(3);
-    c11_string* self = py_touserdata(&argv[0]);
+    c11_string* self = pk_tostr(&argv[0]);
     PY_CHECK_ARG_TYPE(1, tp_str);
     PY_CHECK_ARG_TYPE(2, tp_str);
-    c11_string* old = py_touserdata(&argv[1]);
-    c11_string* new_ = py_touserdata(&argv[2]);
+    c11_string* old = pk_tostr(&argv[1]);
+    c11_string* new_ = pk_tostr(&argv[2]);
     c11_string* res =
         c11_sv__replace2(c11_string__sv(self), c11_string__sv(old), c11_string__sv(new_));
     py_newstrv(py_retval(), (c11_sv){res->data, res->size});
@@ -327,7 +326,7 @@ static bool str_replace(int argc, py_Ref argv) {
 }
 
 static bool str_split(int argc, py_Ref argv) {
-    c11_sv self = c11_string__sv(py_touserdata(&argv[0]));
+    c11_sv self = c11_string__sv(pk_tostr(&argv[0]));
     c11_vector res;
     bool discard_empty = false;
     if(argc > 2) return TypeError("split() takes at most 2 arguments");
@@ -339,7 +338,7 @@ static bool str_split(int argc, py_Ref argv) {
     if(argc == 2) {
         // sep = argv[1]
         if(!py_checkstr(&argv[1])) return false;
-        c11_sv sep = c11_string__sv(py_touserdata(&argv[1]));
+        c11_sv sep = c11_string__sv(pk_tostr(&argv[1]));
         if(sep.size == 0) return ValueError("empty separator");
         res = c11_sv__split2(self, sep);
     }
@@ -355,22 +354,22 @@ static bool str_split(int argc, py_Ref argv) {
 
 static bool str_count(int argc, py_Ref argv) {
     PY_CHECK_ARGC(2);
-    c11_string* self = py_touserdata(&argv[0]);
+    c11_string* self = pk_tostr(&argv[0]);
     PY_CHECK_ARG_TYPE(1, tp_str);
-    c11_string* sub = py_touserdata(&argv[1]);
+    c11_string* sub = pk_tostr(&argv[1]);
     int res = c11_sv__count(c11_string__sv(self), c11_string__sv(sub));
     py_newint(py_retval(), res);
     return true;
 }
 
 static bool str__strip_impl(bool left, bool right, int argc, py_Ref argv) {
-    c11_sv self = c11_string__sv(py_touserdata(&argv[0]));
+    c11_sv self = c11_string__sv(pk_tostr(&argv[0]));
     c11_sv chars;
     if(argc == 1) {
         chars = (c11_sv){" \t\n\r", 4};
     } else if(argc == 2) {
         if(!py_checkstr(&argv[1])) return false;
-        chars = c11_string__sv(py_touserdata(&argv[1]));
+        chars = c11_string__sv(pk_tostr(&argv[1]));
     } else {
         return TypeError("strip() takes at most 2 arguments");
     }
@@ -387,7 +386,7 @@ static bool str_rstrip(int argc, py_Ref argv) { return str__strip_impl(false, tr
 
 static bool str_zfill(int argc, py_Ref argv) {
     PY_CHECK_ARGC(2);
-    c11_sv self = c11_string__sv(py_touserdata(&argv[0]));
+    c11_sv self = c11_string__sv(pk_tostr(&argv[0]));
     PY_CHECK_ARG_TYPE(1, tp_int);
     int width = py_toint(py_arg(1));
     int delta = width - c11_sv__u8_length(self);
@@ -412,12 +411,12 @@ static bool str__widthjust_impl(bool left, int argc, py_Ref argv) {
         pad = ' ';
     } else {
         if(!py_checkstr(&argv[2])) return false;
-        c11_string* padstr = py_touserdata(&argv[2]);
+        c11_string* padstr = pk_tostr(&argv[2]);
         if(padstr->size != 1)
             return TypeError("The fill character must be exactly one character long");
         pad = padstr->data[0];
     }
-    c11_sv self = c11_string__sv(py_touserdata(&argv[0]));
+    c11_sv self = c11_string__sv(pk_tostr(&argv[0]));
     PY_CHECK_ARG_TYPE(1, tp_int);
     int width = py_toint(py_arg(1));
     if(width <= self.size) {
@@ -452,9 +451,9 @@ static bool str_find(int argc, py_Ref argv) {
         PY_CHECK_ARG_TYPE(2, tp_int);
         start = py_toint(py_arg(2));
     }
-    c11_string* self = py_touserdata(&argv[0]);
+    c11_string* self = pk_tostr(&argv[0]);
     PY_CHECK_ARG_TYPE(1, tp_str);
-    c11_string* sub = py_touserdata(&argv[1]);
+    c11_string* sub = pk_tostr(&argv[1]);
     int res = c11_sv__index2(c11_string__sv(self), c11_string__sv(sub), start);
     py_newint(py_retval(), res);
     return true;

+ 1 - 1
tests/04_str.py

@@ -190,7 +190,7 @@ assert (1 != '1') is True
 assert (1 == '1') is False
 assert 1 == 1.0
 
-assert chr(97) is 'a'
+assert chr(97) == 'a'
 assert ord('a') == 97
 
 assert ord('🥕') == 0x1f955