1 năm trước cách đây · 8319cb2ad4
--- a/include/pocketpy/common/str.h
+++ b/include/pocketpy/common/str.h
@@ -29,8 +29,10 @@ typedef struct c11_bytes{
 
															 } c11_bytes;
														
 
															 int c11_sv__cmp(c11_sv self, c11_sv other);
														
 
															-int c11_sv__cmp2(c11_sv self, const char* other, int size);
														
 
															-int c11_sv__cmp3(c11_sv self, const char* other);
														
 
															+int c11_sv__cmp2(c11_sv self, const char* other);
														
 
															+
														
 
															+bool c11__streq(const char* a, const char* b);
														
 
															+bool c11__sveq(c11_sv a, const char* b);
														
 
															 c11_string* c11_string__new(const char* data);
														
 
															 c11_string* c11_string__new2(const char* data, int size);
														
@@ -66,6 +68,14 @@ int c11__byte_index_to_unicode(const char* data, int n);
 
															 bool c11__is_unicode_Lo_char(int c);
														
 
															 int c11__u8_header(unsigned char c, bool suppress);
														
 
															+typedef enum IntParsingResult{
														
 
															+    IntParsing_SUCCESS,
														
 
															+    IntParsing_FAILURE,
														
 
															+    IntParsing_OVERFLOW,
														
 
															+} IntParsingResult;
														
 
															+
														
 
															+IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base);
														
 
															+
														
 
															 #ifdef __cplusplus
														
 
															 }
														
 
															 #endif
														
--- a/include/pocketpy/compiler/lexer.h
+++ b/include/pocketpy/compiler/lexer.h
@@ -87,14 +87,6 @@ enum Precedence {
 
															     PREC_HIGHEST,
														
 
															 };
														
 
															-typedef enum IntParsingResult{
														
 
															-    IntParsing_SUCCESS,
														
 
															-    IntParsing_FAILURE,
														
 
															-    IntParsing_OVERFLOW,
														
 
															-} IntParsingResult;
														
 
															-
														
 
															-IntParsingResult parse_uint(c11_sv text, int64_t* out, int base);
														
 
															-
														
 
															 typedef struct Error Error;
														
 
															 typedef c11_array pk_TokenArray;
														
--- a/include/pocketpy/pocketpy.h
+++ b/include/pocketpy/pocketpy.h
@@ -311,6 +311,10 @@ py_GlobalRef py_tpobject(py_Type type);
 
															 /// Get the type name.
														
 
															 const char* py_tpname(py_Type type);
														
 
															+/// Check if the object is an instance of the given type.
														
 
															+/// Re
														
 
															+bool py_checktype(const py_Ref self, py_Type type);
														
 
															+
														
 
															 /// Python favored string formatting.
														
 
															 /// %d: int
														
 
															 /// %i: py_i64 (int64_t)
														
--- a/src/common/str.c
+++ b/src/common/str.c
--- a/src/compiler/lexer.c
+++ b/src/compiler/lexer.c
@@ -246,11 +246,11 @@ static Error* eat_name(pk_Lexer* self){
 
															     c11_sv name = {self->token_start, length};
														
 
															     if(self->src->mode == JSON_MODE) {
														
 
															-        if(c11_sv__cmp3(name, "true") == 0) {
														
 
															+        if(c11__sveq(name, "true")) {
														
 
															             add_token(self, TK_TRUE);
														
 
															-        } else if(c11_sv__cmp3(name, "false") == 0) {
														
 
															+        } else if(c11__sveq(name, "false")) {
														
 
															             add_token(self, TK_FALSE);
														
 
															-        } else if(c11_sv__cmp3(name, "null") == 0) {
														
 
															+        } else if(c11__sveq(name, "null")) {
														
 
															             add_token(self, TK_NONE);
														
 
															         } else {
														
 
															             return SyntaxError("invalid JSON token");
														
@@ -260,12 +260,12 @@ static Error* eat_name(pk_Lexer* self){
 
															     const char** KW_BEGIN = pk_TokenSymbols + TK_FALSE;
														
 
															     int KW_COUNT = TK__COUNT__ - TK_FALSE;
														
 
															-    #define less(a, b) (c11_sv__cmp3(b, a) > 0)
														
 
															+    #define less(a, b) (c11_sv__cmp2(b, a) > 0)
														
 
															     int out;
														
 
															     c11__lower_bound(const char*, KW_BEGIN, KW_COUNT, name, less, &out);
														
 
															     #undef less
														
 
															-    if(out != KW_COUNT && c11_sv__cmp3(name, KW_BEGIN[out]) == 0) {
														
 
															+    if(out != KW_COUNT && c11__sveq(name, KW_BEGIN[out])) {
														
 
															         add_token(self, (TokenIndex)(out + TK_FALSE));
														
 
															     } else {
														
 
															         add_token(self, TK_ID);
														
@@ -372,7 +372,7 @@ static Error* eat_number(pk_Lexer* self){
 
															         }
														
 
															         // try integer
														
 
															         TokenValue value = {.index = TokenValue_I64};
														
 
															-        switch(parse_uint(text, &value._i64, -1)) {
														
 
															+        switch(c11__parse_uint(text, &value._i64, -1)) {
														
 
															             case IntParsing_SUCCESS:
														
 
															                 add_token_with_value(self, TK_NUM, value);
														
 
															                 return NULL;
														
@@ -554,7 +554,7 @@ static Error* from_precompiled(pk_Lexer* self) {
 
															     deserializer.curr += 5;  // skip "pkpy:"
														
 
															     c11_sv version = TokenDeserializer__read_string(&deserializer, '\n');
														
 
															-    if(c11_sv__cmp3(version, PK_VERSION) != 0) {
														
 
															+    if(c11_sv__cmp2(version, PK_VERSION) != 0) {
														
 
															         return SyntaxError("precompiled version mismatch");
														
 
															     }
														
 
															     if(TokenDeserializer__read_uint(&deserializer, '\n') != (int64_t)self->src->mode){
														
@@ -618,97 +618,6 @@ static Error* from_precompiled(pk_Lexer* self) {
 
															     return NULL;
														
 
															 }
														
 
															-IntParsingResult parse_uint(c11_sv text, int64_t* out, int base) {
														
 
															-    *out = 0;
														
 
															-
														
 
															-    c11_sv prefix = {.data = text.data, .size = PK_MIN(2, text.size)};
														
 
															-    if(base == -1) {
														
 
															-        if(c11_sv__cmp3(prefix, "0b") == 0)
														
 
															-            base = 2;
														
 
															-        else if(c11_sv__cmp3(prefix, "0o") == 0)
														
 
															-            base = 8;
														
 
															-        else if(c11_sv__cmp3(prefix, "0x") == 0)
														
 
															-            base = 16;
														
 
															-        else
														
 
															-            base = 10;
														
 
															-    }
														
 
															-
														
 
															-    if(base == 10) {
														
 
															-        // 10-base  12334
														
 
															-        if(text.size == 0) return IntParsing_FAILURE;
														
 
															-        for(int i = 0; i < text.size; i++) {
														
 
															-            char c = text.data[i];
														
 
															-            if(c >= '0' && c <= '9') {
														
 
															-                *out = (*out * 10) + (c - '0');
														
 
															-            } else {
														
 
															-                return IntParsing_FAILURE;
														
 
															-            }
														
 
															-        }
														
 
															-        // "9223372036854775807".__len__() == 19
														
 
															-        if(text.size > 19) return IntParsing_OVERFLOW;
														
 
															-        return IntParsing_SUCCESS;
														
 
															-    } else if(base == 2) {
														
 
															-        // 2-base   0b101010
														
 
															-        if(c11_sv__cmp3(prefix, "0b") == 0) {
														
 
															-            // text.remove_prefix(2);
														
 
															-            text = (c11_sv){text.data + 2, text.size - 2};
														
 
															-        }
														
 
															-        if(text.size == 0) return IntParsing_FAILURE;
														
 
															-        for(int i = 0; i < text.size; i++) {
														
 
															-            char c = text.data[i];
														
 
															-            if(c == '0' || c == '1') {
														
 
															-                *out = (*out << 1) | (c - '0');
														
 
															-            } else {
														
 
															-                return IntParsing_FAILURE;
														
 
															-            }
														
 
															-        }
														
 
															-        // "111111111111111111111111111111111111111111111111111111111111111".__len__() == 63
														
 
															-        if(text.size > 63) return IntParsing_OVERFLOW;
														
 
															-        return IntParsing_SUCCESS;
														
 
															-    } else if(base == 8) {
														
 
															-        // 8-base   0o123
														
 
															-        if(c11_sv__cmp3(prefix, "0o") == 0) {
														
 
															-            // text.remove_prefix(2);
														
 
															-            text = (c11_sv){text.data + 2, text.size - 2};
														
 
															-        }
														
 
															-        if(text.size == 0) return IntParsing_FAILURE;
														
 
															-        for(int i = 0; i < text.size; i++) {
														
 
															-            char c = text.data[i];
														
 
															-            if(c >= '0' && c <= '7') {
														
 
															-                *out = (*out << 3) | (c - '0');
														
 
															-            } else {
														
 
															-                return IntParsing_FAILURE;
														
 
															-            }
														
 
															-        }
														
 
															-        // "777777777777777777777".__len__() == 21
														
 
															-        if(text.size > 21) return IntParsing_OVERFLOW;
														
 
															-        return IntParsing_SUCCESS;
														
 
															-    } else if(base == 16) {
														
 
															-        // 16-base  0x123
														
 
															-        if(c11_sv__cmp3(prefix, "0x") == 0) {
														
 
															-            // text.remove_prefix(2);
														
 
															-            text = (c11_sv){text.data + 2, text.size - 2};
														
 
															-        }
														
 
															-        if(text.size == 0) return IntParsing_FAILURE;
														
 
															-        for(int i = 0; i < text.size; i++) {
														
 
															-            char c = text.data[i];
														
 
															-            if(c >= '0' && c <= '9') {
														
 
															-                *out = (*out << 4) | (c - '0');
														
 
															-            } else if(c >= 'a' && c <= 'f') {
														
 
															-                *out = (*out << 4) | (c - 'a' + 10);
														
 
															-            } else if(c >= 'A' && c <= 'F') {
														
 
															-                *out = (*out << 4) | (c - 'A' + 10);
														
 
															-            } else {
														
 
															-                return IntParsing_FAILURE;
														
 
															-            }
														
 
															-        }
														
 
															-        // "7fffffffffffffff".__len__() == 16
														
 
															-        if(text.size > 16) return IntParsing_OVERFLOW;
														
 
															-        return IntParsing_SUCCESS;
														
 
															-    }
														
 
															-    return IntParsing_FAILURE;
														
 
															-}
														
 
															-
														
 
															 Error* pk_Lexer__process(pk_SourceData_ src, pk_TokenArray* out_tokens){
														
 
															     pk_Lexer lexer;
														
 
															     pk_Lexer__ctor(&lexer, src);
														
--- a/src/interpreter/py_number.c
+++ b/src/interpreter/py_number.c
@@ -1,4 +1,5 @@
 
															 #include "pocketpy/interpreter/vm.h"
														
 
															+#include "pocketpy/pocketpy.h"
														
 
															 #include <math.h>
														
@@ -43,6 +44,8 @@ DEF_NUM_BINARY_OP(__ge__, >=, py_newbool, py_newbool)
 
															 #undef DEF_NUM_BINARY_OP
														
 
															+static bool ValueError(const char* fmt, ...) { return false; }
														
 
															+
														
 
															 static bool _py_int__neg__(int argc, py_Ref argv) {
														
 
															     py_checkargc(1);
														
 
															     int64_t val = py_toint(&argv[0]);
														
@@ -182,6 +185,161 @@ DEF_INT_BITWISE_OP(__rshift__, >>)
 
															 #undef DEF_INT_BITWISE_OP
														
 
															+static bool _py_int__repr__(int argc, py_Ref argv) {
														
 
															+    py_checkargc(1);
														
 
															+    int64_t val = py_toint(&argv[0]);
														
 
															+    char buf[32];
														
 
															+    int size = snprintf(buf, sizeof(buf), "%lld", (long long)val);
														
 
															+    py_newstrn(py_retval(), buf, size);
														
 
															+    return true;
														
 
															+}
														
 
															+
														
 
															+static bool _py_float__repr__(int argc, py_Ref argv) {
														
 
															+    py_checkargc(1);
														
 
															+    double val = py_tofloat(&argv[0]);
														
 
															+    char buf[32];
														
 
															+    int size = snprintf(buf, sizeof(buf), "%f", val);
														
 
															+    py_newstrn(py_retval(), buf, size);
														
 
															+    return true;
														
 
															+}
														
 
															+
														
 
															+union c11_8bytes {
														
 
															+    py_i64 _i64;
														
 
															+    py_f64 _f64;
														
 
															+
														
 
															+    union {
														
 
															+        uint32_t upper;
														
 
															+        uint32_t lower;
														
 
															+    } bits;
														
 
															+};
														
 
															+
														
 
															+static py_i64 c11_8bytes__hash(union c11_8bytes u) {
														
 
															+    // https://stackoverflow.com/questions/664014/what-integer-hash-function-are-good-that-accepts-an-integer-hash-key
														
 
															+    const uint32_t C = 2654435761;
														
 
															+    u.bits.upper *= C;
														
 
															+    u.bits.lower *= C;
														
 
															+    return u._i64;
														
 
															+}
														
 
															+
														
 
															+static bool _py_int__hash__(int argc, py_Ref argv) {
														
 
															+    py_checkargc(1);
														
 
															+    int64_t val = py_toint(&argv[0]);
														
 
															+    union c11_8bytes u = {._i64 = val};
														
 
															+    py_newint(py_retval(), c11_8bytes__hash(u));
														
 
															+    return true;
														
 
															+}
														
 
															+
														
 
															+static bool _py_float__hash__(int argc, py_Ref argv) {
														
 
															+    py_checkargc(1);
														
 
															+    double val = py_tofloat(&argv[0]);
														
 
															+    union c11_8bytes u = {._f64 = val};
														
 
															+    py_newint(py_retval(), c11_8bytes__hash(u));
														
 
															+    return true;
														
 
															+}
														
 
															+
														
 
															+static bool _py_int__new__(int argc, py_Ref argv) {
														
 
															+    if(argc == 1 + 0) {
														
 
															+        // int() == 0
														
 
															+        py_newint(py_retval(), 0);
														
 
															+        return true;
														
 
															+    }
														
 
															+    // 1 arg
														
 
															+    if(argc == 1 + 1) {
														
 
															+        switch(argv[1].type) {
														
 
															+            case tp_float: {
														
 
															+                // int(1.1) == 1
														
 
															+                py_newint(py_retval(), (int64_t)py_tofloat(&argv[1]));
														
 
															+                return true;
														
 
															+            }
														
 
															+            case tp_int: {
														
 
															+                // int(1) == 1
														
 
															+                *py_retval() = argv[1];
														
 
															+                return true;
														
 
															+            }
														
 
															+            case tp_bool: {
														
 
															+                // int(True) == 1
														
 
															+                py_newint(py_retval(), (int64_t)py_tobool(&argv[1]));
														
 
															+                return true;
														
 
															+            }
														
 
															+            case tp_str: break;  // leave to the next block
														
 
															+            default: return TypeError("invalid arguments for int()");
														
 
															+        }
														
 
															+    }
														
 
															+    // 2+ args -> error
														
 
															+    if(argc > 1 + 2) return TypeError("int() takes at most 2 arguments");
														
 
															+    // 1 or 2 args with str
														
 
															+    int base = 10;
														
 
															+    if(argc == 1 + 2) {
														
 
															+        if(!py_checktype(py_arg(2), tp_int)) return false;
														
 
															+        base = py_toint(py_arg(2));
														
 
															+    }
														
 
															+
														
 
															+    if(!py_checktype(py_arg(1), tp_str)) return false;
														
 
															+    int size;
														
 
															+    const char* data = py_tostrn(py_arg(1), &size);
														
 
															+    bool negative = false;
														
 
															+    if(size && (data[0] == '+' || data[0] == '-')) {
														
 
															+        negative = data[0] == '-';
														
 
															+        data++;
														
 
															+        size--;
														
 
															+    }
														
 
															+    py_i64 val;
														
 
															+    if(c11__parse_uint((c11_sv){data, size}, &val, base) != IntParsing_SUCCESS) {
														
 
															+        return ValueError("invalid literal for int() with base %d: %q", base, data);
														
 
															+    }
														
 
															+    py_newint(py_retval(), negative ? -val : val);
														
 
															+    return true;
														
 
															+}
														
 
															+
														
 
															+static bool _py_float__new__(int argc, py_Ref argv) {
														
 
															+    if(argc == 1 + 0) {
														
 
															+        // float() == 0.0
														
 
															+        py_newfloat(py_retval(), 0.0);
														
 
															+        return true;
														
 
															+    }
														
 
															+    if(argc > 1 + 1) return TypeError("float() takes at most 1 argument");
														
 
															+    // 1 arg
														
 
															+    switch(argv[1].type) {
														
 
															+        case tp_int: {
														
 
															+            // float(1) == 1.0
														
 
															+            py_newfloat(py_retval(), py_toint(&argv[1]));
														
 
															+            return true;
														
 
															+        }
														
 
															+        case tp_float: {
														
 
															+            // float(1.1) == 1.1
														
 
															+            *py_retval() = argv[1];
														
 
															+            return true;
														
 
															+        }
														
 
															+        case tp_bool: {
														
 
															+            // float(True) == 1.0
														
 
															+            py_newfloat(py_retval(), py_tobool(&argv[1]));
														
 
															+            return true;
														
 
															+        }
														
 
															+        case tp_str: break;  // leave to the next block
														
 
															+        default: return TypeError("invalid arguments for float()");
														
 
															+    }
														
 
															+    // str to float
														
 
															+    int size;
														
 
															+    const char* data = py_tostrn(py_arg(1), &size);
														
 
															+
														
 
															+    if(c11__streq(data, "inf")){
														
 
															+        py_newfloat(py_retval(), INFINITY);
														
 
															+        return true;
														
 
															+    }
														
 
															+    if(c11__streq(data, "-inf")){
														
 
															+        py_newfloat(py_retval(), -INFINITY);
														
 
															+        return true;
														
 
															+    }
														
 
															+
														
 
															+    char* p_end;
														
 
															+    py_f64 float_out = strtod(data, &p_end);
														
 
															+    if(p_end != data + size){
														
 
															+        return ValueError("invalid literal for float(): %q", data);
														
 
															+    }
														
 
															+    py_newfloat(py_retval(), float_out);
														
 
															+    return true;
														
 
															+}
														
 
															+
														
 
															 void pk_VM__init_builtins(pk_VM* self) {
														
 
															     /****** tp_int & tp_float ******/
														
 
															     py_bindmagic(tp_int, __add__, _py_int__add__);
														
@@ -208,7 +366,17 @@ void pk_VM__init_builtins(pk_VM* self) {
 
															     py_bindmagic(tp_int, __neg__, _py_int__neg__);
														
 
															     py_bindmagic(tp_float, __neg__, _py_float__neg__);
														
 
															-    // TODO: __repr__, __new__, __hash__
														
 
															+    // __repr__
														
 
															+    py_bindmagic(tp_int, __repr__, _py_int__repr__);
														
 
															+    py_bindmagic(tp_float, __repr__, _py_float__repr__);
														
 
															+
														
 
															+    // __hash__
														
 
															+    py_bindmagic(tp_int, __hash__, _py_int__hash__);
														
 
															+    py_bindmagic(tp_float, __hash__, _py_float__hash__);
														
 
															+
														
 
															+    // __new__
														
 
															+    py_bindmagic(tp_int, __new__, _py_int__new__);
														
 
															+    py_bindmagic(tp_float, __new__, _py_float__new__);
														
 
															     // __truediv__
														
 
															     py_bindmagic(tp_int, __truediv__, _py_int__truediv__);
														
--- a/src/public/cast.c
+++ b/src/public/cast.c
@@ -40,3 +40,11 @@ void* py_touserdata(const py_Ref self) {
 
															 }
														
 
															 bool py_istype(const py_Ref self, py_Type type) { return self->type == type; }
														
 
															+
														
 
															+bool py_checktype(const py_Ref self, py_Type type) {
														
 
															+    if(self->type != type) {
														
 
															+        // py_raise(PyExc_TypeError, "expected %s, got %s", py_typename(type), py_typename(self->type));
														
 
															+        return false;
														
 
															+    }
														
 
															+    return true;
														
 
															+}