blueloveTH преди 1 година
родител
ревизия
b94b535de8
променени са 4 файла, в които са добавени 40 реда и са изтрити 229 реда
  1. 0 1
      include/pocketpy/compiler/lexer.h
  2. 0 2
      include/pocketpy/objects/sourcedata.h
  3. 32 42
      src/common/sourcedata.c
  4. 8 184
      src/compiler/lexer.c

+ 0 - 1
include/pocketpy/compiler/lexer.h

@@ -87,7 +87,6 @@ enum Precedence {
 typedef c11_array TokenArray;
 
 Error* Lexer__process(SourceData_ src, TokenArray* out_tokens);
-Error* Lexer__process_and_dump(SourceData_ src, c11_string** out_string);
 void TokenArray__dtor(TokenArray* self);
 
 #define Token__sv(self) (c11_sv){(self)->start, (self)->length}

+ 0 - 2
include/pocketpy/objects/sourcedata.h

@@ -9,14 +9,12 @@
 struct SourceData {
     RefCounted rc;
     enum py_CompileMode mode;
-    bool is_precompiled;
     bool is_dynamic;  // for exec() and eval()
 
     c11_string* filename;
     c11_string* source;
 
     c11_vector /*T=const char* */ line_starts;
-    c11_vector /*T=c11_string* */ _precompiled_tokens;
 };
 
 typedef struct SourceData* SourceData_;

+ 32 - 42
src/common/sourcedata.c

@@ -5,14 +5,13 @@
 #include <string.h>
 
 static void SourceData__ctor(struct SourceData* self,
-                                const char* source,
-                                const char* filename,
-                                enum py_CompileMode mode,
-                                bool is_dynamic) {
+                             const char* source,
+                             const char* filename,
+                             enum py_CompileMode mode,
+                             bool is_dynamic) {
     self->filename = c11_string__new(filename);
     self->mode = mode;
     c11_vector__ctor(&self->line_starts, sizeof(const char*));
-    c11_vector__ctor(&self->_precompiled_tokens, sizeof(c11_string*));
 
     // Skip utf8 BOM if there is any.
     if(strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3;
@@ -26,7 +25,6 @@ static void SourceData__ctor(struct SourceData* self,
         source++;
     }
     self->source = c11_sbuf__submit(&ss);
-    self->is_precompiled = (strncmp(source, "pkpy:", 5) == 0);
     self->is_dynamic = is_dynamic;
     c11_vector__push(const char*, &self->line_starts, self->source->data);
 }
@@ -34,19 +32,13 @@ static void SourceData__ctor(struct SourceData* self,
 static void SourceData__dtor(struct SourceData* self) {
     c11_string__delete(self->filename);
     c11_string__delete(self->source);
-
     c11_vector__dtor(&self->line_starts);
-
-    for(int i = 0; i < self->_precompiled_tokens.count; i++) {
-        c11_string__delete(c11__getitem(c11_string*, &self->_precompiled_tokens, i));
-    }
-    c11_vector__dtor(&self->_precompiled_tokens);
 }
 
 SourceData_ SourceData__rcnew(const char* source,
-                                    const char* filename,
-                                    enum py_CompileMode mode,
-                                    bool is_dynamic) {
+                              const char* filename,
+                              enum py_CompileMode mode,
+                              bool is_dynamic) {
     SourceData_ self = malloc(sizeof(struct SourceData));
     SourceData__ctor(self, source, filename, mode, is_dynamic);
     self->rc.count = 1;
@@ -55,10 +47,10 @@ SourceData_ SourceData__rcnew(const char* source,
 }
 
 bool SourceData__get_line(const struct SourceData* self,
-                             int lineno,
-                             const char** st,
-                             const char** ed) {
-    if(self->is_precompiled || lineno == -1) { return false; }
+                          int lineno,
+                          const char** st,
+                          const char** ed) {
+    if(lineno < 0) return false;
     lineno -= 1;
     if(lineno < 0) lineno = 0;
     const char* _start = c11__getitem(const char*, &self->line_starts, lineno);
@@ -72,10 +64,10 @@ bool SourceData__get_line(const struct SourceData* self,
 }
 
 void SourceData__snapshot(const struct SourceData* self,
-                             c11_sbuf* ss,
-                             int lineno,
-                             const char* cursor,
-                             const char* name) {
+                          c11_sbuf* ss,
+                          int lineno,
+                          const char* cursor,
+                          const char* name) {
     pk_sprintf(ss, "  File \"%s\", line %d", self->filename->data, lineno);
 
     if(name && *name) {
@@ -83,26 +75,24 @@ void SourceData__snapshot(const struct SourceData* self,
         c11_sbuf__write_cstr(ss, name);
     }
 
-    if(!self->is_precompiled) {
-        c11_sbuf__write_char(ss, '\n');
-        const char *st = NULL, *ed;
-        if(SourceData__get_line(self, lineno, &st, &ed)) {
-            while(st < ed && isblank(*st))
-                ++st;
-            if(st < ed) {
-                c11_sbuf__write_cstr(ss, "    ");
-                c11_sbuf__write_cstrn(ss, st, ed - st);
-                if(cursor && st <= cursor && cursor <= ed) {
-                    c11_sbuf__write_cstr(ss, "\n    ");
-                    for(int i = 0; i < (cursor - st); ++i)
-                        c11_sbuf__write_char(ss, ' ');
-                    c11_sbuf__write_cstr(ss, "^");
-                }
-            } else {
-                st = NULL;
+    c11_sbuf__write_char(ss, '\n');
+    const char *st = NULL, *ed;
+    if(SourceData__get_line(self, lineno, &st, &ed)) {
+        while(st < ed && isblank(*st))
+            ++st;
+        if(st < ed) {
+            c11_sbuf__write_cstr(ss, "    ");
+            c11_sbuf__write_cstrn(ss, st, ed - st);
+            if(cursor && st <= cursor && cursor <= ed) {
+                c11_sbuf__write_cstr(ss, "\n    ");
+                for(int i = 0; i < (cursor - st); ++i)
+                    c11_sbuf__write_char(ss, ' ');
+                c11_sbuf__write_cstr(ss, "^");
             }
+        } else {
+            st = NULL;
         }
-
-        if(!st) { c11_sbuf__write_cstr(ss, "    <?>"); }
     }
+
+    if(!st) { c11_sbuf__write_cstr(ss, "    <?>"); }
 }

+ 8 - 184
src/compiler/lexer.c

@@ -267,7 +267,11 @@ static Error* eat_name(Lexer* self) {
     return NULL;
 }
 
-static Error* eat_string_until(Lexer* self, char quote, bool raw, c11_string** out) {
+enum StringType { NORMAL_STRING, RAW_STRING, F_STRING, NORMAL_BYTES };
+
+static Error* eat_string(Lexer* self, char quote, enum StringType type) {
+    bool raw = type == RAW_STRING;
+
     // previous char is quote
     bool quote3 = match_n_chars(self, 2, quote);
     c11_sbuf buff;
@@ -313,17 +317,9 @@ static Error* eat_string_until(Lexer* self, char quote, bool raw, c11_string** o
             c11_sbuf__write_char(&buff, c);
         }
     }
-    *out = c11_sbuf__submit(&buff);
-    return NULL;
-}
-
-enum StringType { NORMAL_STRING, RAW_STRING, F_STRING, NORMAL_BYTES };
 
-static Error* eat_string(Lexer* self, char quote, enum StringType type) {
-    c11_string* s;
-    Error* err = eat_string_until(self, quote, type == RAW_STRING, &s);
-    if(err) return err;
-    TokenValue value = {TokenValue_STR, ._str = s};
+    c11_string* res = c11_sbuf__submit(&buff);
+    TokenValue value = {TokenValue_STR, ._str = res};
     if(type == F_STRING) {
         add_token_with_value(self, TK_FSTR, value);
     } else if(type == NORMAL_BYTES) {
@@ -468,8 +464,7 @@ static Error* lex_one_token(Lexer* self, bool* eof) {
                 if(matchchar(self, '=')) {
                     add_token(self, TK_NE);
                 } else {
-                    Error* err = SyntaxError(self, "expected '=' after '!'");
-                    if(err) return err;
+                    return SyntaxError(self, "expected '=' after '!'");
                 }
                 break;
             case '*':
@@ -523,85 +518,10 @@ static Error* lex_one_token(Lexer* self, bool* eof) {
     return NULL;
 }
 
-static Error* from_precompiled(Lexer* self) {
-    TokenDeserializer deserializer;
-    TokenDeserializer__ctor(&deserializer, self->src->source->data);
-
-    deserializer.curr += 5;  // skip "pkpy:"
-    c11_sv version = TokenDeserializer__read_string(&deserializer, '\n');
-
-    if(c11_sv__cmp2(version, PK_VERSION) != 0) {
-        return SyntaxError(self, "precompiled version mismatch");
-    }
-    if(TokenDeserializer__read_uint(&deserializer, '\n') != (int64_t)self->src->mode) {
-        return SyntaxError(self, "precompiled mode mismatch");
-    }
-
-    int count = TokenDeserializer__read_count(&deserializer);
-    c11_vector* precompiled_tokens = &self->src->_precompiled_tokens;
-    for(int i = 0; i < count; i++) {
-        c11_sv item = TokenDeserializer__read_string(&deserializer, '\n');
-        c11_string* copied_item = c11_string__new2(item.data, item.size);
-        c11_vector__push(c11_string*, precompiled_tokens, copied_item);
-    }
-
-    count = TokenDeserializer__read_count(&deserializer);
-    for(int i = 0; i < count; i++) {
-        Token t;
-        t.type = (TokenIndex)TokenDeserializer__read_uint(&deserializer, ',');
-        if(is_raw_string_used(t.type)) {
-            int64_t index = TokenDeserializer__read_uint(&deserializer, ',');
-            c11_string* p = c11__getitem(c11_string*, precompiled_tokens, index);
-            t.start = p->data;
-            t.length = p->size;
-        } else {
-            t.start = NULL;
-            t.length = 0;
-        }
-
-        if(TokenDeserializer__match_char(&deserializer, ',')) {
-            t.line = c11_vector__back(Token, &self->nexts).line;
-        } else {
-            t.line = (int)TokenDeserializer__read_uint(&deserializer, ',');
-        }
-
-        if(TokenDeserializer__match_char(&deserializer, ',')) {
-            t.brackets_level = c11_vector__back(Token, &self->nexts).brackets_level;
-        } else {
-            t.brackets_level = (int)TokenDeserializer__read_uint(&deserializer, ',');
-        }
-
-        char type = (*deserializer.curr++);  // read_char
-        switch(type) {
-            case 'I': {
-                int64_t res = TokenDeserializer__read_uint(&deserializer, '\n');
-                t.value = (TokenValue){TokenValue_I64, ._i64 = res};
-            } break;
-            case 'F': {
-                double res = TokenDeserializer__read_float(&deserializer, '\n');
-                t.value = (TokenValue){TokenValue_F64, ._f64 = res};
-            } break;
-            case 'S': {
-                c11_string* res = TokenDeserializer__read_string_from_hex(&deserializer, '\n');
-                t.value = (TokenValue){TokenValue_STR, ._str = res};
-            } break;
-            default: t.value = EmptyTokenValue; break;
-        }
-        c11_vector__push(Token, &self->nexts, t);
-    }
-    return NULL;
-}
-
 Error* Lexer__process(SourceData_ src, TokenArray* out_tokens) {
     Lexer lexer;
     Lexer__ctor(&lexer, src);
 
-    if(src->is_precompiled) {
-        Error* err = from_precompiled(&lexer);
-        // TODO: set out tokens
-        Lexer__dtor(&lexer);
-        return err;
-    }
     // push initial tokens
     Token sof =
         {TK_SOF, lexer.token_start, 0, lexer.current_line, lexer.brackets_level, EmptyTokenValue};
@@ -623,102 +543,6 @@ Error* Lexer__process(SourceData_ src, TokenArray* out_tokens) {
     return NULL;
 }
 
-Error* Lexer__process_and_dump(SourceData_ src, c11_string** out) {
-    assert(!src->is_precompiled);
-    TokenArray nexts;  // output tokens
-    Error* err = Lexer__process(src, &nexts);
-    if(err) return err;
-
-    c11_sbuf ss;
-    c11_sbuf__ctor(&ss);
-
-    // L1: version string
-    c11_sbuf__write_cstr(&ss, "pkpy:" PK_VERSION "\n");
-    // L2: mode
-    c11_sbuf__write_int(&ss, (int)src->mode);
-    c11_sbuf__write_char(&ss, '\n');
-
-    c11_smallmap_s2n token_indices;
-    c11_smallmap_s2n__ctor(&token_indices);
-
-    c11__foreach(Token, &nexts, token) {
-        if(is_raw_string_used(token->type)) {
-            c11_sv token_sv = {token->start, token->length};
-            if(!c11_smallmap_s2n__contains(&token_indices, token_sv)) {
-                c11_smallmap_s2n__set(&token_indices, token_sv, 0);
-            }
-        }
-    }
-    // L3: raw string count
-    c11_sbuf__write_char(&ss, '=');
-    c11_sbuf__write_int(&ss, token_indices.count);
-    c11_sbuf__write_char(&ss, '\n');
-
-    uint16_t index = 0;
-    for(int i = 0; i < token_indices.count; i++) {
-        c11_smallmap_s2n_KV* kv = c11__at(c11_smallmap_s2n_KV, &token_indices, i);
-        // L4: raw strings
-        c11_sbuf__write_cstrn(&ss, kv->key.data, kv->key.size);
-        kv->value = index++;
-    }
-
-    // L5: token count
-    c11_sbuf__write_char(&ss, '=');
-    c11_sbuf__write_int(&ss, nexts.count);
-    c11_sbuf__write_char(&ss, '\n');
-
-    for(int i = 0; i < nexts.count; i++) {
-        const Token* token = c11__at(Token, &nexts, i);
-        c11_sbuf__write_int(&ss, (int)token->type);
-        c11_sbuf__write_char(&ss, ',');
-
-        if(is_raw_string_used(token->type)) {
-            uint16_t* p =
-                c11_smallmap_s2n__try_get(&token_indices, (c11_sv){token->start, token->length});
-            assert(p != NULL);
-            c11_sbuf__write_int(&ss, (int)*p);
-            c11_sbuf__write_char(&ss, ',');
-        }
-        if(i > 0 && c11__getitem(Token, &nexts, i - 1).line == token->line) {
-            c11_sbuf__write_char(&ss, ',');
-        } else {
-            c11_sbuf__write_int(&ss, token->line);
-            c11_sbuf__write_char(&ss, ',');
-        }
-
-        if(i > 0 && c11__getitem(Token, &nexts, i - 1).brackets_level == token->brackets_level) {
-            c11_sbuf__write_char(&ss, ',');
-        } else {
-            c11_sbuf__write_int(&ss, token->brackets_level);
-            c11_sbuf__write_char(&ss, ',');
-        }
-        // visit token value
-        switch(token->value.index) {
-            case TokenValue_EMPTY: break;
-            case TokenValue_I64:
-                c11_sbuf__write_char(&ss, 'I');
-                c11_sbuf__write_int(&ss, token->value._i64);
-                break;
-            case TokenValue_F64:
-                c11_sbuf__write_char(&ss, 'F');
-                c11_sbuf__write_f64(&ss, token->value._f64, -1);
-                break;
-            case TokenValue_STR: {
-                c11_sbuf__write_char(&ss, 'S');
-                c11_sv sv = c11_string__sv(token->value._str);
-                for(int i = 0; i < sv.size; i++) {
-                    c11_sbuf__write_hex(&ss, sv.data[i], false);
-                }
-                break;
-            }
-        }
-        c11_sbuf__write_char(&ss, '\n');
-    }
-    *out = c11_sbuf__submit(&ss);
-    c11_smallmap_s2n__dtor(&token_indices);
-    return NULL;
-}
-
 void TokenArray__dtor(TokenArray* self) {
     Token* data = self->data;
     for(int i = 0; i < self->count; i++) {