blueloveTH 1 an în urmă
părinte
comite
c4bb16e390

+ 25 - 0
include/pocketpy/compiler/lexer.h

@@ -0,0 +1,25 @@
+#pragma once
+
+#include <stdbool.h>
+#include "pocketpy/common/str.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct pkpy_TokenDeserializer {
+    const char* curr;
+    const char* source;
+} pkpy_TokenDeserializer;
+
+void pkpy_TokenDeserializer__ctor(pkpy_TokenDeserializer* self, const char* source);
+bool pkpy_TokenDeserializer__match_char(pkpy_TokenDeserializer* self, char c);
+c11_string pkpy_TokenDeserializer__read_string(pkpy_TokenDeserializer* self, char c);
+pkpy_Str pkpy_TokenDeserializer__read_string_from_hex(pkpy_TokenDeserializer* self, char c);
+int pkpy_TokenDeserializer__read_count(pkpy_TokenDeserializer* self);
+int64_t pkpy_TokenDeserializer__read_uint(pkpy_TokenDeserializer* self, char c);
+double pkpy_TokenDeserializer__read_float(pkpy_TokenDeserializer* self, char c);
+
+#ifdef __cplusplus
+}
+#endif

+ 0 - 23
include/pocketpy/compiler/lexer.hpp

@@ -146,27 +146,4 @@ enum class IntParsingResult {
 
 IntParsingResult parse_uint(std::string_view text, i64* out, int base) noexcept;
 
-struct TokenDeserializer {
-    const char* curr;
-    const char* source;
-
-    TokenDeserializer(const char* source) noexcept: curr(source), source(source){}
-
-    char read_char() noexcept{ return *curr++; }
-
-    bool match_char(char c) noexcept{
-        if(*curr == c) {
-            curr++;
-            return true;
-        }
-        return false;
-    }
-
-    std::string_view read_string(char c) noexcept;
-    Str read_string_from_hex(char c) noexcept;
-    int read_count() noexcept;
-    i64 read_uint(char c) noexcept;
-    f64 read_float(char c) noexcept;
-};
-
 }  // namespace pkpy

+ 4 - 0
src/common/sourcedata.c

@@ -33,6 +33,10 @@ void pkpy_SourceData__dtor(struct pkpy_SourceData* self) {
     pkpy_Str__dtor(&self->filename);
     pkpy_Str__dtor(&self->source);
     c11_vector__dtor(&self->line_starts);
+
+    for(int i=0; i<self->_precompiled_tokens.count; i++){
+        pkpy_Str__dtor(c11__at(pkpy_Str, &self->_precompiled_tokens, i));
+    }
     c11_vector__dtor(&self->_precompiled_tokens);
 }
 

+ 83 - 0
src/compiler/lexer.c

@@ -0,0 +1,83 @@
+#include "pocketpy/common/config.h"
+#include "pocketpy/common/str.h"
+#include "pocketpy/common/smallmap.h"
+#include "pocketpy/compiler/lexer.h"
+
+void pkpy_TokenDeserializer__ctor(pkpy_TokenDeserializer* self, const char* source){
+    self->curr = source;
+    self->source = source;
+}
+
+bool pkpy_TokenDeserializer__match_char(pkpy_TokenDeserializer* self, char c){
+    if(*self->curr == c) {
+        self->curr++;
+        return true;
+    }
+    return false;
+}
+
+c11_string pkpy_TokenDeserializer__read_string(pkpy_TokenDeserializer* self, char c){
+    const char* start = self->curr;
+    while(*self->curr != c)
+        self->curr++;
+    c11_string retval = {start, self->curr - start};
+    self->curr++;  // skip the delimiter
+    return retval;
+}
+
+pkpy_Str pkpy_TokenDeserializer__read_string_from_hex(pkpy_TokenDeserializer* self, char c){
+    c11_string sv = pkpy_TokenDeserializer__read_string(self, c);
+    const char* s = sv.data;
+    char* buffer = (char*)malloc(sv.size / 2 + 1);
+    for(int i = 0; i < sv.size; i += 2) {
+        char c = 0;
+        if(s[i] >= '0' && s[i] <= '9')
+            c += s[i] - '0';
+        else if(s[i] >= 'a' && s[i] <= 'f')
+            c += s[i] - 'a' + 10;
+        else
+            assert(false);
+        c <<= 4;
+        if(s[i + 1] >= '0' && s[i + 1] <= '9')
+            c += s[i + 1] - '0';
+        else if(s[i + 1] >= 'a' && s[i + 1] <= 'f')
+            c += s[i + 1] - 'a' + 10;
+        else
+            assert(false);
+        buffer[i / 2] = c;
+    }
+    buffer[sv.size / 2] = 0;
+    return (pkpy_Str){
+        .size = sv.size / 2,
+        .is_ascii = c11__isascii(buffer, sv.size / 2),
+        .is_sso = false,
+        ._ptr = buffer
+    };
+}
+
+int pkpy_TokenDeserializer__read_count(pkpy_TokenDeserializer* self){
+    assert(*self->curr == '=');
+    self->curr++;
+    return pkpy_TokenDeserializer__read_uint(self, '\n');
+}
+
+int64_t pkpy_TokenDeserializer__read_uint(pkpy_TokenDeserializer* self, char c){
+    int64_t out = 0;
+    while(*self->curr != c) {
+        out = out * 10 + (*self->curr - '0');
+        self->curr++;
+    }
+    self->curr++;  // skip the delimiter
+    return out;
+}
+
+double pkpy_TokenDeserializer__read_float(pkpy_TokenDeserializer* self, char c){
+    c11_string sv = pkpy_TokenDeserializer__read_string(self, c);
+    pkpy_Str nullterm;
+    pkpy_Str__ctor2(&nullterm, sv.data, sv.size);
+    char* end;
+    double retval = strtod(pkpy_Str__data(&nullterm), &end);
+    pkpy_Str__dtor(&nullterm);
+    assert(*end == 0);
+    return retval;
+}

+ 38 - 76
src/compiler/lexer.cpp

@@ -2,6 +2,7 @@
 #include "pocketpy/common/config.h"
 #include "pocketpy/common/str.h"
 #include "pocketpy/common/smallmap.h"
+#include "pocketpy/compiler/lexer.h"
 
 #include <cstdarg>
 
@@ -542,54 +543,69 @@ Error* Lexer::run() noexcept{
 }
 
 Error* Lexer::from_precompiled() noexcept{
-    TokenDeserializer deserializer(pkpy_Str__data(&src->source));
+    pkpy_TokenDeserializer deserializer;
+    pkpy_TokenDeserializer__ctor(&deserializer, pkpy_Str__data(&src->source));
+
     deserializer.curr += 5;  // skip "pkpy:"
-    std::string_view version = deserializer.read_string('\n');
+    c11_string version = pkpy_TokenDeserializer__read_string(&deserializer, '\n');
 
-    if(version != PK_VERSION){
+    if(c11_string__cmp3(version, PK_VERSION) != 0) {
         return SyntaxError("precompiled version mismatch");
     }
-    if(deserializer.read_uint('\n') != (i64)src->mode){
+    if(pkpy_TokenDeserializer__read_uint(&deserializer, '\n') != (i64)src->mode){
         return SyntaxError("precompiled mode mismatch");
     }
 
-    int count = deserializer.read_count();
-    auto precompiled_tokens = &src->_precompiled_tokens;
+    int count = pkpy_TokenDeserializer__read_count(&deserializer);
+    c11_vector* precompiled_tokens = &src->_precompiled_tokens;
     for(int i = 0; i < count; i++) {
-        c11_vector__push(Str, precompiled_tokens, Str(deserializer.read_string('\n')));
+        c11_string item = pkpy_TokenDeserializer__read_string(&deserializer, '\n');
+        pkpy_Str copied_item;
+        pkpy_Str__ctor2(&copied_item, item.data, item.size);
+        c11_vector__push(pkpy_Str, precompiled_tokens, copied_item);
     }
 
-    count = deserializer.read_count();
+    count = pkpy_TokenDeserializer__read_count(&deserializer);
     for(int i = 0; i < count; i++) {
         Token t;
-        t.type = (unsigned char)deserializer.read_uint(',');
+        t.type = (unsigned char)pkpy_TokenDeserializer__read_uint(&deserializer, ',');
         if(is_raw_string_used(t.type)) {
-            i64 index = deserializer.read_uint(',');
-            t.start = c11__getitem(Str, precompiled_tokens, index).c_str();
-            t.length = c11__getitem(Str, precompiled_tokens, index).size;
+            i64 index = pkpy_TokenDeserializer__read_uint(&deserializer, ',');
+            pkpy_Str* p = c11__at(pkpy_Str, precompiled_tokens, index);
+            t.start = pkpy_Str__data(p);
+            t.length = c11__getitem(pkpy_Str, precompiled_tokens, index).size;
         } else {
-            t.start = nullptr;
+            t.start = NULL;
             t.length = 0;
         }
 
-        if(deserializer.match_char(',')) {
+        if(pkpy_TokenDeserializer__match_char(&deserializer, ',')) {
             t.line = nexts.back().line;
         } else {
-            t.line = (int)deserializer.read_uint(',');
+            t.line = (int)pkpy_TokenDeserializer__read_uint(&deserializer, ',');
         }
 
-        if(deserializer.match_char(',')) {
+        if(pkpy_TokenDeserializer__match_char(&deserializer, ',')) {
             t.brackets_level = nexts.back().brackets_level;
         } else {
-            t.brackets_level = (int)deserializer.read_uint(',');
+            t.brackets_level = (int)pkpy_TokenDeserializer__read_uint(&deserializer, ',');
         }
 
-        char type = deserializer.read_char();
+        char type = (*deserializer.curr++);      // read_char
         switch(type) {
-            case 'I': t.value = deserializer.read_uint('\n'); break;
-            case 'F': t.value = deserializer.read_float('\n'); break;
-            case 'S': t.value = deserializer.read_string_from_hex('\n'); break;
-            default: t.value = {}; break;
+            case 'I':
+                t.value = pkpy_TokenDeserializer__read_uint(&deserializer, '\n');
+                break;
+            case 'F':
+                t.value = pkpy_TokenDeserializer__read_float(&deserializer, '\n');
+                break;
+            case 'S': {
+                pkpy_Str res = pkpy_TokenDeserializer__read_string_from_hex(&deserializer, '\n');
+                t.value = Str(std::move(res));
+            } break;
+            default:
+                t.value = {};
+                break;
         }
         nexts.push_back(t);
     }
@@ -665,60 +681,6 @@ Error* Lexer::precompile(Str* out) noexcept{
     return NULL;
 }
 
-std::string_view TokenDeserializer::read_string(char c) noexcept{
-    const char* start = curr;
-    while(*curr != c)
-        curr++;
-    std::string_view retval(start, curr - start);
-    curr++;  // skip the delimiter
-    return retval;
-}
-
-Str TokenDeserializer::read_string_from_hex(char c) noexcept{
-    std::string_view s = read_string(c);
-    char* buffer = (char*)std::malloc(s.size() / 2 + 1);
-    for(int i = 0; i < s.size(); i += 2) {
-        char c = 0;
-        if(s[i] >= '0' && s[i] <= '9')
-            c += s[i] - '0';
-        else if(s[i] >= 'a' && s[i] <= 'f')
-            c += s[i] - 'a' + 10;
-        else
-            assert(false);
-        c <<= 4;
-        if(s[i + 1] >= '0' && s[i + 1] <= '9')
-            c += s[i + 1] - '0';
-        else if(s[i + 1] >= 'a' && s[i + 1] <= 'f')
-            c += s[i + 1] - 'a' + 10;
-        else
-            assert(false);
-        buffer[i / 2] = c;
-    }
-    buffer[s.size() / 2] = 0;
-    return pair<char*, int>(buffer, s.size() / 2);
-}
-
-int TokenDeserializer::read_count() noexcept{
-    assert(*curr == '=');
-    curr++;
-    return read_uint('\n');
-}
-
-i64 TokenDeserializer::read_uint(char c) noexcept{
-    i64 out = 0;
-    while(*curr != c) {
-        out = out * 10 + (*curr - '0');
-        curr++;
-    }
-    curr++;  // skip the delimiter
-    return out;
-}
-
-f64 TokenDeserializer::read_float(char c) noexcept{
-    std::string_view sv = read_string(c);
-    return std::stod(std::string(sv));
-}
-
 IntParsingResult parse_uint(std::string_view text, i64* out, int base) noexcept{
     *out = 0;