blueloveTH 1 год назад
Родитель
Сommit
d74ca31f68

+ 2 - 3
include/pocketpy/common/str.h

@@ -32,7 +32,8 @@ int c11_sv__cmp(c11_sv self, c11_sv other);
 int c11_sv__cmp2(c11_sv self, const char* other);
 
 bool c11__streq(const char* a, const char* b);
-bool c11__sveq(c11_sv a, const char* b);
+bool c11__sveq(c11_sv a, c11_sv b);
+bool c11__sveq2(c11_sv a, const char* b);
 
 c11_string* c11_string__new(const char* data);
 c11_string* c11_string__new2(const char* data, int size);
@@ -48,8 +49,6 @@ c11_sv c11_string__u8_getitem(c11_string* self, int i);
 c11_string* c11_string__u8_slice(c11_string* self, int start, int stop, int step);
 
 // general string operations
-void c11_sv__lower(c11_sv sv, c11_vector* buf);
-void c11_sv__upper(c11_sv sv, c11_vector* buf);
 c11_sv c11_sv__slice(c11_sv sv, int start);
 c11_sv c11_sv__slice2(c11_sv sv, int start, int stop);
 c11_sv c11_sv__strip(c11_sv sv, bool left, bool right);

+ 2 - 2
include/pocketpy/pocketpy.h

@@ -92,7 +92,7 @@ void py_newnativefunc(py_Ref out, py_CFunction);
 /// @param type type of the object.
 /// @param slots number of slots. Use -1 to create a `__dict__`.
 /// @param udsize size of your userdata. You can use `py_touserdata()` to get the pointer to it.
-void py_newobject(py_Ref out, py_Type type, int slots, int udsize);
+void* py_newobject(py_Ref out, py_Type type, int slots, int udsize);
 /************* Type Cast *************/
 py_i64 py_toint(const py_Ref);
 py_f64 py_tofloat(const py_Ref);
@@ -120,7 +120,7 @@ bool py_issubclass(py_Type derived, py_Type base);
 
 #define PY_CHECK_ARG_TYPE(i, type)   if(!py_checktype(py_arg(i), type)) return false
 
-#define py_offset(p, i) (py_Ref)((char*)p + ((i) << 4))
+#define py_offset(p, i) ((py_Ref)((char*)p + ((i) << 4)))
 #define py_arg(i) py_offset(argv, i)
 
 py_GlobalRef py_tpmagic(py_Type type, py_Name name);

+ 14 - 23
src/common/str.c

@@ -69,26 +69,12 @@ c11_string* c11_string__u8_slice(c11_string* self, int start, int stop, int step
 }
 
 /////////////////////////////////////////
-void c11_sv__lower(c11_sv sv, c11_vector* buf) {
-    for(int i = 0; i < sv.size; i++) {
-        char c = sv.data[i];
-        if('A' <= c && c <= 'Z') c += 32;
-        c11_vector__push(char, buf, c);
-    }
-}
-
-void c11_sv__upper(c11_sv sv, c11_vector* buf) {
-    for(int i = 0; i < sv.size; i++) {
-        char c = sv.data[i];
-        if('a' <= c && c <= 'z') c -= 32;
-        c11_vector__push(char, buf, c);
-    }
-}
-
 c11_sv c11_sv__slice(c11_sv sv, int start) { return c11_sv__slice2(sv, start, sv.size); }
 
 c11_sv c11_sv__slice2(c11_sv sv, int start, int stop) {
+    if(start < 0) start = 0;
     if(stop < start) stop = start;
+    if(stop > sv.size) stop = sv.size;
     return (c11_sv){sv.data + start, stop - start};
 }
 
@@ -211,7 +197,12 @@ int c11_sv__cmp2(c11_sv self, const char* other) {
 
 bool c11__streq(const char* a, const char* b) { return strcmp(a, b) == 0; }
 
-bool c11__sveq(c11_sv a, const char* b) {
+bool c11__sveq(c11_sv a, c11_sv b) {
+    if(a.size != b.size) return false;
+    return memcmp(a.data, b.data, a.size) == 0;
+}
+
+bool c11__sveq2(c11_sv a, const char* b) {
     int size = strlen(b);
     if(a.size != size) return false;
     return memcmp(a.data, b, size) == 0;
@@ -250,11 +241,11 @@ IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base) {
 
     c11_sv prefix = {.data = text.data, .size = c11__min(2, text.size)};
     if(base == -1) {
-        if(c11__sveq(prefix, "0b"))
+        if(c11__sveq2(prefix, "0b"))
             base = 2;
-        else if(c11__sveq(prefix, "0o"))
+        else if(c11__sveq2(prefix, "0o"))
             base = 8;
-        else if(c11__sveq(prefix, "0x"))
+        else if(c11__sveq2(prefix, "0x"))
             base = 16;
         else
             base = 10;
@@ -276,7 +267,7 @@ IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base) {
         return IntParsing_SUCCESS;
     } else if(base == 2) {
         // 2-base   0b101010
-        if(c11__sveq(prefix, "0b")) {
+        if(c11__sveq2(prefix, "0b")) {
             // text.remove_prefix(2);
             text = (c11_sv){text.data + 2, text.size - 2};
         }
@@ -294,7 +285,7 @@ IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base) {
         return IntParsing_SUCCESS;
     } else if(base == 8) {
         // 8-base   0o123
-        if(c11__sveq(prefix, "0o")) {
+        if(c11__sveq2(prefix, "0o")) {
             // text.remove_prefix(2);
             text = (c11_sv){text.data + 2, text.size - 2};
         }
@@ -312,7 +303,7 @@ IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base) {
         return IntParsing_SUCCESS;
     } else if(base == 16) {
         // 16-base  0x123
-        if(c11__sveq(prefix, "0x")) {
+        if(c11__sveq2(prefix, "0x")) {
             // text.remove_prefix(2);
             text = (c11_sv){text.data + 2, text.size - 2};
         }

+ 4 - 4
src/compiler/lexer.c

@@ -246,11 +246,11 @@ static Error* eat_name(pk_Lexer* self){
     c11_sv name = {self->token_start, length};
 
     if(self->src->mode == JSON_MODE) {
-        if(c11__sveq(name, "true")) {
+        if(c11__sveq2(name, "true")) {
             add_token(self, TK_TRUE);
-        } else if(c11__sveq(name, "false")) {
+        } else if(c11__sveq2(name, "false")) {
             add_token(self, TK_FALSE);
-        } else if(c11__sveq(name, "null")) {
+        } else if(c11__sveq2(name, "null")) {
             add_token(self, TK_NONE);
         } else {
             return SyntaxError("invalid JSON token");
@@ -265,7 +265,7 @@ static Error* eat_name(pk_Lexer* self){
     c11__lower_bound(const char*, KW_BEGIN, KW_COUNT, name, less, &out);
     #undef less
 
-    if(out != KW_COUNT && c11__sveq(name, KW_BEGIN[out])) {
+    if(out != KW_COUNT && c11__sveq2(name, KW_BEGIN[out])) {
         add_token(self, (TokenIndex)(out + TK_FALSE));
     } else {
         add_token(self, TK_ID);

+ 0 - 0
src/interpreter/py_number.c → src/public/py_number.c


+ 232 - 17
src/public/py_str.c

@@ -4,24 +4,9 @@
 #include "pocketpy/common/utils.h"
 #include "pocketpy/objects/object.h"
 #include "pocketpy/interpreter/vm.h"
+#include "pocketpy/common/sstream.h"
 
-py_Type pk_str__register() {
-    pk_VM* vm = pk_current_vm;
-    py_Type type = pk_VM__new_type(vm, "str", tp_object, NULL, false);
-    // no need to dtor because the memory is controlled by the object
-    return type;
-}
-
-py_Type pk_bytes__register() {
-    pk_VM* vm = pk_current_vm;
-    py_Type type = pk_VM__new_type(vm, "bytes", tp_object, NULL, false);
-    // no need to dtor because the memory is controlled by the object
-    return type;
-}
-
-void py_newstr(py_Ref out, const char* data) {
-    return py_newstrn(out, data, strlen(data));
-}
+void py_newstr(py_Ref out, const char* data) { return py_newstrn(out, data, strlen(data)); }
 
 void py_newstrn(py_Ref out, const char* data, int size) {
     pk_ManagedHeap* heap = &pk_current_vm->heap;
@@ -66,3 +51,233 @@ unsigned char* py_tobytes(const py_Ref self, int* size) {
     return ud->data;
 }
 
+////////////////////////////////
+
+static bool _py_str__new__(int argc, py_Ref argv) { return true; }
+
+static bool _py_str__hash__(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(1);
+    int size;
+    const char* data = py_tostrn(&argv[0], &size);
+    py_i64 res = 0;
+    for(int i = 0; i < size; i++) {
+        res = res * 31 + data[i];
+    }
+    py_newint(py_retval(), res);
+    return true;
+}
+
+static bool _py_str__len__(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(1);
+    c11_string* self = py_touserdata(&argv[0]);
+    py_newint(py_retval(), self->size);
+    return true;
+}
+
+static bool _py_str__add__(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(2);
+    c11_string* self = py_touserdata(&argv[0]);
+    if(py_arg(1)->type != tp_str) {
+        py_newnotimplemented(py_retval());
+    } else {
+        c11_string* other = py_touserdata(&argv[1]);
+        int total_size = sizeof(c11_string) + self->size + other->size + 1;
+        c11_string* res = py_newobject(py_retval(), tp_str, 0, total_size);
+        res->size = self->size + other->size;
+        char* p = (char*)res->data;
+        memcpy(p, self->data, self->size);
+        memcpy(p + self->size, other->data, other->size);
+        p[res->size] = '\0';
+    }
+    return true;
+}
+
+static bool _py_str__mul__(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(2);
+    c11_string* self = py_touserdata(&argv[0]);
+    if(py_arg(1)->type != tp_int) {
+        py_newnotimplemented(py_retval());
+    } else {
+        py_i64 n = py_toint(py_arg(1));
+        if(n <= 0) {
+            py_newstr(py_retval(), "");
+        } else {
+            int total_size = sizeof(c11_string) + self->size * n + 1;
+            c11_string* res = py_newobject(py_retval(), tp_str, 0, total_size);
+            res->size = self->size * n;
+            char* p = (char*)res->data;
+            for(int i = 0; i < n; i++) {
+                memcpy(p + i * self->size, self->data, self->size);
+            }
+            p[res->size] = '\0';
+        }
+    }
+    return true;
+}
+
+static bool _py_str__rmul__(int argc, py_Ref argv) { return _py_str__mul__(argc, argv); }
+
+static bool _py_str__contains__(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(2);
+    c11_string* self = py_touserdata(&argv[0]);
+    if(py_arg(1)->type != tp_str) {
+        py_newnotimplemented(py_retval());
+    } else {
+        c11_string* other = py_touserdata(&argv[1]);
+        const char* p = strstr(self->data, other->data);
+        py_newbool(py_retval(), p != NULL);
+    }
+    return true;
+}
+
+static bool _py_str__str__(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(1);
+    *py_retval() = argv[0];
+    return true;
+}
+
+static bool _py_str__repr__(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(1);
+    assert(false);
+    return false;
+}
+
+static bool _py_str__iter__(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(1);
+    assert(false);
+    return false;
+}
+
+static bool _py_str__getitem__(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(2);
+    c11_string* self = py_touserdata(&argv[0]);
+    PY_CHECK_ARG_TYPE(1, tp_int);
+    c11_sv res = c11_string__u8_getitem(self, py_toint(py_arg(1)));
+    py_newstrn(py_retval(), res.data, res.size);
+    return true;
+}
+
+#define DEF_STR_CMP_OP(op, f, condition)                                                           \
+    static bool _py_str##op(int argc, py_Ref argv) {                                               \
+        PY_CHECK_ARGC(2);                                                                          \
+        c11_string* self = py_touserdata(&argv[0]);                                                \
+        if(py_arg(1)->type != tp_str) {                                                            \
+            py_newnotimplemented(py_retval());                                                     \
+        } else {                                                                                   \
+            c11_string* other = py_touserdata(&argv[1]);                                           \
+            int res = c11_sv__cmp(c11_string__sv(self), c11_string__sv(other));                    \
+            py_newbool(py_retval(), condition);                                                    \
+        }                                                                                          \
+        return true;                                                                               \
+    }
+
+DEF_STR_CMP_OP(__eq__, c11__sveq, res)
+DEF_STR_CMP_OP(__ne__, c11__sveq, !res)
+DEF_STR_CMP_OP(__lt__, c11_sv__cmp, res < 0)
+DEF_STR_CMP_OP(__le__, c11_sv__cmp, res <= 0)
+DEF_STR_CMP_OP(__gt__, c11_sv__cmp, res > 0)
+DEF_STR_CMP_OP(__ge__, c11_sv__cmp, res >= 0)
+
+#undef DEF_STR_CMP_OP
+
+static bool _py_str__lower(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(1);
+    c11_string* self = py_touserdata(&argv[0]);
+    int total_size = sizeof(c11_string) + self->size + 1;
+    c11_string* res = py_newobject(py_retval(), tp_str, 0, total_size);
+    res->size = self->size;
+    char* p = (char*)res->data;
+    for(int i = 0; i < self->size; i++) {
+        char c = self->data[i];
+        p[i] = c >= 'A' && c <= 'Z' ? c + 32 : c;
+    }
+    p[res->size] = '\0';
+    return true;
+}
+
+static bool _py_str__upper(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(1);
+    c11_string* self = py_touserdata(&argv[0]);
+    int total_size = sizeof(c11_string) + self->size + 1;
+    c11_string* res = py_newobject(py_retval(), tp_str, 0, total_size);
+    res->size = self->size;
+    char* p = (char*)res->data;
+    for(int i = 0; i < self->size; i++) {
+        char c = self->data[i];
+        p[i] = c >= 'a' && c <= 'z' ? c - 32 : c;
+    }
+    p[res->size] = '\0';
+    return true;
+}
+
+static bool _py_str__startswith(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(2);
+    c11_string* self = py_touserdata(&argv[0]);
+    PY_CHECK_ARG_TYPE(1, tp_str);
+    c11_string* other = py_touserdata(&argv[1]);
+    c11_sv _0 = c11_sv__slice2(c11_string__sv(self), 0, other->size);
+    c11_sv _1 = c11_string__sv(other);
+    py_newbool(py_retval(), c11__sveq(_0, _1));
+    return true;
+}
+
+static bool _py_str__endswith(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(2);
+    c11_string* self = py_touserdata(&argv[0]);
+    PY_CHECK_ARG_TYPE(1, tp_str);
+    c11_string* other = py_touserdata(&argv[1]);
+    c11_sv _0 = c11_sv__slice2(c11_string__sv(self), self->size - other->size, self->size);
+    c11_sv _1 = c11_string__sv(other);
+    py_newbool(py_retval(), c11__sveq(_0, _1));
+    return true;
+}
+
+static bool _py_str__join(int argc, py_Ref argv) {
+    assert(false);
+    // PY_CHECK_ARGC(2);
+    // c11_sbuf buf;
+    // c11_sbuf__ctor(&buf);
+    // c11_string* sep = py_touserdata(&argv[0]);
+    // py_Ref iter = py_pushtmp();
+    // py_iter(iter, &argv[1]);
+    return false;
+}
+
+py_Type pk_str__register() {
+    pk_VM* vm = pk_current_vm;
+    py_Type type = pk_VM__new_type(vm, "str", tp_object, NULL, false);
+    // no need to dtor because the memory is controlled by the object
+
+    py_bindmagic(tp_str, __new__, _py_str__new__);
+    py_bindmagic(tp_str, __hash__, _py_str__hash__);
+    py_bindmagic(tp_str, __len__, _py_str__len__);
+    py_bindmagic(tp_str, __add__, _py_str__add__);
+    py_bindmagic(tp_str, __mul__, _py_str__mul__);
+    py_bindmagic(tp_str, __rmul__, _py_str__rmul__);
+    py_bindmagic(tp_str, __contains__, _py_str__contains__);
+    py_bindmagic(tp_str, __str__, _py_str__str__);
+    py_bindmagic(tp_str, __repr__, _py_str__repr__);
+    py_bindmagic(tp_str, __iter__, _py_str__iter__);
+    py_bindmagic(tp_str, __getitem__, _py_str__getitem__);
+
+    py_bindmagic(tp_str, __eq__, _py_str__eq__);
+    py_bindmagic(tp_str, __ne__, _py_str__ne__);
+    py_bindmagic(tp_str, __lt__, _py_str__lt__);
+    py_bindmagic(tp_str, __le__, _py_str__le__);
+    py_bindmagic(tp_str, __gt__, _py_str__gt__);
+    py_bindmagic(tp_str, __ge__, _py_str__ge__);
+
+    py_bindmethod(tp_str, "lower", _py_str__lower);
+    py_bindmethod(tp_str, "upper", _py_str__upper);
+    py_bindmethod(tp_str, "startswith", _py_str__startswith);
+    py_bindmethod(tp_str, "endswith", _py_str__endswith);
+    py_bindmethod(tp_str, "join", _py_str__join);
+    return type;
+}
+
+py_Type pk_bytes__register() {
+    pk_VM* vm = pk_current_vm;
+    py_Type type = pk_VM__new_type(vm, "bytes", tp_object, NULL, false);
+    // no need to dtor because the memory is controlled by the object
+    return type;
+}

+ 2 - 1
src/public/values.c

@@ -82,10 +82,11 @@ void py_newslice(py_Ref out, const py_Ref start, const py_Ref stop, const py_Ref
     py_setslot(out, 2, step);
 }
 
-void py_newobject(py_Ref out, py_Type type, int slots, int udsize) {
+void* py_newobject(py_Ref out, py_Type type, int slots, int udsize) {
     pk_ManagedHeap* heap = &pk_current_vm->heap;
     PyObject* obj = pk_ManagedHeap__gcnew(heap, type, slots, udsize);
     out->type = type;
     out->is_ptr = true;
     out->_obj = obj;
+    return PyObject__userdata(obj);
 }