blueloveTH пре 1 година
родитељ
комит
0d41d40456
6 измењених фајлова са 113 додато и 26 уклоњено
  1. 2 0
      include/pocketpy/common.h
  2. 15 1
      include/pocketpy/compiler.h
  3. 1 0
      include/pocketpy/error.h
  4. 83 18
      src/compiler.cpp
  5. 6 1
      src/error.cpp
  6. 6 6
      src/str.cpp

+ 2 - 0
include/pocketpy/common.h

@@ -170,4 +170,6 @@ inline constexpr bool is_integral_v = std::is_same_v<T, char>
 template<typename T>
 inline constexpr bool is_floating_point_v = std::is_same_v<T, float> || std::is_same_v<T, double>;
 
+inline const char* PK_HEX_TABLE = "0123456789abcdef";
+
 } // namespace pkpy

+ 15 - 1
include/pocketpy/compiler.h

@@ -121,8 +121,22 @@ class Compiler {
 
 public:
     Compiler(VM* vm, std::string_view source, const Str& filename, CompileMode mode, bool unknown_global_scope=false);
-    void precompile();
+    Str precompile();
+    void from_precompiled(const char* source);
     CodeObject_ compile();
 };
 
+struct TokenDeserializer{
+    const char* curr;
+    const char* source;
+
+    TokenDeserializer(const char* source): curr(source), source(source) {}
+    char read_char(){ return *curr++; }
+
+    std::string_view read_string(char c);
+    Str read_string_from_hex(char c);
+    i64 read_int(char c);
+    f64 read_float(char c);
+};
+
 } // namespace pkpy

+ 1 - 0
include/pocketpy/error.h

@@ -31,6 +31,7 @@ struct SourceData {
 
     Str source;
     pod_vector<const char*> line_starts;
+    bool is_precompiled;
     
     SourceData(std::string_view source, const Str& filename, CompileMode mode);
     SourceData(const Str& filename, CompileMode mode);

+ 83 - 18
src/compiler.cpp

@@ -1225,40 +1225,67 @@ __EAT_DOTS_END:
         init_pratt_rules();
     }
 
-    void Compiler::precompile(){
+    Str Compiler::precompile(){
         auto tokens = lexer.run();
         SStream ss;
-        ss << PK_VERSION << '\n';                   // L1: version string
-        ss << lexer.src->filename << '\n';          // L2: filename
-        ss << mode() << '\n';                       // L3: compile mode
-        ss << (int)unknown_global_scope << '\n';    // L4: unknown global scope
-        ss << '=' << (int)tokens.size() << '\n';    // L5: token count
-        for(auto token: lexer.run()){
-            ss << (int)token.type << '\n';
-            int offset = token.start - lexer.src->source.c_str();
-            ss << offset << '\n';
-            ss << token.length << '\n';
-            ss << token.line << '\n';
-            ss << token.brackets_level << '\n';
+        ss << "pkpy:" PK_VERSION << '\n';           // L1: version string
+        ss << "=" << (int)tokens.size() << '\n';    // L5: token count
+        for(auto token: tokens){
+            ss << (int)token.type << ',';
+            ss << token.line << ',';
+            ss << token.brackets_level << ',';
             // visit token value
             std::visit([&ss](auto&& arg){
                 using T = std::decay_t<decltype(arg)>;
                 if constexpr(std::is_same_v<T, i64>){
-                    ss << 'i' << arg << '\n';
+                    ss << 'I' << arg;
                 }else if constexpr(std::is_same_v<T, f64>){
-                    ss << 'f' << arg << '\n';
+                    ss << 'F' << arg;
                 }else if constexpr(std::is_same_v<T, Str>){
-                    ss << 's' << arg.escape() << '\n';
+                    ss << 'S';
+                    for(char c: arg) ss.write_hex((unsigned char)c);
                 }
+                ss << '\n';
             }, token.value);
         }
-        std::cout << ss.str() << std::endl;
+        return ss.str();
+    }
+
+    void Compiler::from_precompiled(const char* source){
+        TokenDeserializer deserializer(source);
+        deserializer.curr += 5;     // skip "pkpy:"
+        std::string_view version = deserializer.read_string('\n');
+        if(version != PK_VERSION) SyntaxError(_S("precompiled version mismatch: ", version, "!=" PK_VERSION));
+        deserializer.curr += 1;     // skip '='
+        i64 count = deserializer.read_int('\n');
+        const char* null_start = lexer.src->source.c_str();
+        for(int i=0; i<count; i++){
+            Token t;
+            t.type = (unsigned char)deserializer.read_int(',');
+            t.start = null_start;
+            t.length = 0;
+            t.line = (int)deserializer.read_int(',');
+            t.brackets_level = (int)deserializer.read_int(',');
+            char type = deserializer.read_char();
+            switch(type){
+                case 'I': t.value = deserializer.read_int('\n'); break;
+                case 'F': t.value = deserializer.read_float('\n'); break;
+                case 'S': t.value = deserializer.read_string_from_hex('\n'); break;
+                default: t.value = {}; break;
+            }
+            tokens.push_back(t);
+        }
     }
 
     CodeObject_ Compiler::compile(){
         PK_ASSERT(i == 0)       // make sure it is the first time to compile
 
-        tokens = lexer.run();
+        if(lexer.src->is_precompiled){
+            from_precompiled(lexer.src->source.c_str());
+        }else{
+            this->tokens = lexer.run();
+        }
+
         CodeObject_ code = push_global_context();
 
         advance();          // skip @sof, so prev() is always valid
@@ -1296,4 +1323,42 @@ __EAT_DOTS_END:
         e.st_push(src, lineno, cursor, "");
         throw e;
     }
+
+    std::string_view TokenDeserializer::read_string(char c){
+        const char* start = curr;
+        while(*curr != c) curr++;
+        std::string_view retval(start, curr-start);
+        curr++;     // skip the delimiter
+        return retval;
+    }
+
+    Str TokenDeserializer::read_string_from_hex(char c){
+        std::string_view s = read_string(c);
+        char* buffer = (char*)pool64_alloc(s.size()/2 + 1);
+        for(int i=0; i<s.size(); i+=2){
+            char c = 0;
+            if(s[i]>='0' && s[i]<='9') c += s[i]-'0';
+            else if(s[i]>='a' && s[i]<='f') c += s[i]-'a'+10;
+            else PK_FATAL_ERROR();
+            c <<= 4;
+            if(s[i+1]>='0' && s[i+1]<='9') c += s[i+1]-'0';
+            else if(s[i+1]>='a' && s[i+1]<='f') c += s[i+1]-'a'+10;
+            else PK_FATAL_ERROR();
+            buffer[i/2] = c;
+        }
+        return std::pair<char*, int>(buffer, s.size()/2);
+    }
+
+    i64 TokenDeserializer::read_int(char c){
+        std::string_view sv = read_string(c);
+        i64 out;
+        IntParsingResult res = parse_int(sv, &out, 10);
+        PK_ASSERT(res == IntParsingResult::Success);
+        return out;
+    }
+
+    f64 TokenDeserializer::read_float(char c){
+        std::string_view sv = read_string(c);
+        return std::stod(std::string(sv));
+    }
 }   // namespace pkpy

+ 6 - 1
src/error.cpp

@@ -13,6 +13,11 @@ namespace pkpy{
             index++;
         }
         this->source = ss.str();
+        if(this->source.sv().substr(5) == "pkpy:"){
+            this->is_precompiled = true;
+        }else{
+            this->is_precompiled = false;
+        }
         line_starts.push_back(this->source.c_str());
     }
 
@@ -35,7 +40,7 @@ namespace pkpy{
         SStream ss;
         ss << "  " << "File \"" << filename << "\", line " << lineno;
         if(!name.empty()) ss << ", in " << name;
-        if(!source.empty()){
+        if(!source.empty() && !is_precompiled){
             ss << '\n';
             std::pair<const char*,const char*> pair = _get_line(lineno);
             Str line = "<?>";

+ 6 - 6
src/str.cpp

@@ -255,8 +255,8 @@ int utf8len(unsigned char c, bool suppress){
                 default:
                     if ('\x00' <= c && c <= '\x1f') {
                         ss << "\\x"; // << std::hex << std::setw(2) << std::setfill('0') << (int)c;
-                        ss << "0123456789abcdef"[c >> 4];
-                        ss << "0123456789abcdef"[c & 0xf];
+                        ss << PK_HEX_TABLE[c >> 4];
+                        ss << PK_HEX_TABLE[c & 0xf];
                     } else {
                         ss << c;
                     }
@@ -495,11 +495,11 @@ int utf8len(unsigned char c, bool suppress){
         unsigned char high = c >> 4;
         unsigned char low = c & 0xf;
         if(non_zero){
-            if(high) (*this) << "0123456789abcdef"[high];
-            if(high || low) (*this) << "0123456789abcdef"[low];
+            if(high) (*this) << PK_HEX_TABLE[high];
+            if(high || low) (*this) << PK_HEX_TABLE[low];
         }else{
-            (*this) << "0123456789abcdef"[high];
-            (*this) << "0123456789abcdef"[low];
+            (*this) << PK_HEX_TABLE[high];
+            (*this) << PK_HEX_TABLE[low];
         }
     }