blueloveTH 2 anni fa
parent
commit
c607d11bd6
8 ha cambiato i file con 612 aggiunte e 370 eliminazioni
  1. 0 20
      src/codeobject.h
  2. 8 5
      src/common.h
  3. 444 282
      src/compiler.h
  4. 10 5
      src/error.h
  5. 121 28
      src/expr.h
  6. 5 5
      src/lexer.h
  7. 0 1
      src/main.cpp
  8. 24 24
      src/vm.h

+ 0 - 20
src/codeobject.h

@@ -92,26 +92,6 @@ struct CodeObject {
     void _mark() const {
         for(PyObject* v : consts) OBJ_MARK(v);
     }
-
-    /************************************************/
-    int _curr_block_i = 0;
-    int _rvalue = 0;
-    bool _is_compiling_class = false;
-    bool _is_curr_block_loop() const {
-        return blocks[_curr_block_i].type == FOR_LOOP || blocks[_curr_block_i].type == WHILE_LOOP;
-    }
-
-    void _enter_block(CodeBlockType type){
-        blocks.push_back(CodeBlock{type, _curr_block_i, (int)codes.size()});
-        _curr_block_i = blocks.size()-1;
-    }
-
-    void _exit_block(){
-        blocks[_curr_block_i].end = codes.size();
-        _curr_block_i = blocks[_curr_block_i].parent;
-        if(_curr_block_i < 0) UNREACHABLE();
-    }
-    /************************************************/
 };
 
 

+ 8 - 5
src/common.h

@@ -68,8 +68,7 @@ struct Type {
 	operator int() const noexcept { return this->index; }
 };
 
-//#define THREAD_LOCAL thread_local
-#define THREAD_LOCAL
+#define THREAD_LOCAL	// thread_local
 #define CPP_LAMBDA(x) ([](VM* vm, Args& args) { return x; })
 #define CPP_NOT_IMPLEMENTED() ([](VM* vm, Args& args) { vm->NotImplementedError(); return vm->None; })
 
@@ -79,9 +78,9 @@ struct Type {
 #define UNREACHABLE() throw std::runtime_error( __FILE__ + std::string(":") + std::to_string(__LINE__) + " UNREACHABLE()!");
 #endif
 
-const float kLocalsLoadFactor = 0.67f;
-const float kInstAttrLoadFactor = 0.67f;
-const float kTypeAttrLoadFactor = 0.5f;
+inline const float kLocalsLoadFactor = 0.67f;
+inline const float kInstAttrLoadFactor = 0.67f;
+inline const float kTypeAttrLoadFactor = 0.5f;
 
 static_assert(sizeof(i64) == sizeof(int*));
 static_assert(sizeof(f64) == sizeof(int*));
@@ -114,7 +113,11 @@ public:
 	size_t size() const { return vec.size(); }
 	T& top(){ return vec.back(); }
 	const T& top() const { return vec.back(); }
+	T popx(){ T t = std::move(vec.back()); vec.pop_back(); return t; }
 	const std::vector<T>& data() const { return vec; }
 };
 
+struct Expression;
+typedef std::unique_ptr<Expression> Expression_;
+
 } // namespace pkpy

File diff suppressed because it is too large
+ 444 - 282
src/compiler.h


+ 10 - 5
src/error.h

@@ -1,6 +1,7 @@
 #pragma once
 
 #include "namedict.h"
+#include "str.h"
 #include "tuplelist.h"
 
 namespace pkpy{
@@ -22,7 +23,7 @@ enum CompileMode {
 };
 
 struct SourceData {
-    const char* source;
+    std::string source;
     Str filename;
     std::vector<const char*> line_starts;
     CompileMode mode;
@@ -38,11 +39,17 @@ struct SourceData {
     }
 
     SourceData(const char* source, Str filename, CompileMode mode) {
-        source = strdup(source);
         // Skip utf8 BOM if there is any.
         if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3;
+        // Remove all '\r'
+        StrStream ss;
+        while(*source != '\0'){
+            if(*source != '\r') ss << *source;
+            source++;
+        }
+
         this->filename = filename;
-        this->source = source;
+        this->source = ss.str();
         line_starts.push_back(source);
         this->mode = mode;
     }
@@ -65,8 +72,6 @@ struct SourceData {
         }
         return ss.str();
     }
-
-    ~SourceData() { free((void*)source); }
 };
 
 class Exception {

+ 121 - 28
src/expr.h

@@ -2,107 +2,200 @@
 
 #include "codeobject.h"
 #include "common.h"
-#include "parser.h"
+#include "lexer.h"
 #include "error.h"
 #include "ceval.h"
-#include <memory>
 
 namespace pkpy{
 
-struct Expression;
-typedef std::unique_ptr<Expression> Expression_;
-
 struct Expression{
-    std::vector<Expression_> children;
     virtual Str to_string() const = 0;
 };
 
 struct NameExpr: Expression{
     Str name;
     NameScope scope;
-    NameExpr(Str name, NameScope scope): name(name), scope(scope) {}
+    NameExpr(const Str& name, NameScope scope): name(name), scope(scope) {}
+    NameExpr(Str&& name, NameScope scope): name(std::move(name)), scope(scope) {}
     Str to_string() const override { return name; }
 };
 
-struct GroupExpr: Expression{
-    Expression_ expr;
-    GroupExpr(Expression_ expr): expr(std::move(expr)) {}
-    Str to_string() const override { return "()"; }
-};
-
 struct UnaryExpr: Expression{
     TokenIndex op;
-    UnaryExpr(TokenIndex op): op(op) {}
+    Expression_ child;
+    UnaryExpr(TokenIndex op, Expression_&& child): op(op), child(std::move(child)) {}
     Str to_string() const override { return TK_STR(op); }
 };
 
 struct NotExpr: Expression{
+    Expression_ child;
+    NotExpr(Expression_&& child): child(std::move(child)) {}
     Str to_string() const override { return "not"; }
 };
 
 struct AndExpr: Expression{
+    Expression_ lhs;
+    Expression_ rhs;
+    AndExpr(Expression_&& lhs, Expression_&& rhs): lhs(std::move(lhs)), rhs(std::move(rhs)) {}
     Str to_string() const override { return "and"; }
 };
 
 struct OrExpr: Expression{
+    Expression_ lhs;
+    Expression_ rhs;
+    OrExpr(Expression_&& lhs, Expression_&& rhs): lhs(std::move(lhs)), rhs(std::move(rhs)) {}
     Str to_string() const override { return "or"; }
 };
 
-// None, True, False, ...
-struct SpecialValueExpr: Expression{
+// [None, True, False, ...]
+struct SpecialLiteralExpr: Expression{
     TokenIndex token;
-    SpecialValueExpr(TokenIndex token): token(token) {}
+    SpecialLiteralExpr(TokenIndex token): token(token) {}
     Str to_string() const override { return TK_STR(token); }
+
+    void gen(){
+        // switch (token) {
+        //     case TK("None"):    emit(OP_LOAD_NONE);  break;
+        //     case TK("True"):    emit(OP_LOAD_TRUE);  break;
+        //     case TK("False"):   emit(OP_LOAD_FALSE); break;
+        //     case TK("..."):     emit(OP_LOAD_ELLIPSIS); break;
+        //     default: UNREACHABLE();
+        // }
+    }
 };
 
 // @num, @str which needs to invoke OP_LOAD_CONST
 struct LiteralExpr: Expression{
-    PyObject* value;
-    LiteralExpr(PyObject* value): value(value) {}
+    TokenValue value;
+    LiteralExpr(TokenValue value): value(value) {}
     Str to_string() const override { return "literal"; }
 };
 
+struct SliceExpr: Expression{
+    Expression_ start;
+    Expression_ stop;
+    Expression_ step;
+    SliceExpr(Expression_&& start, Expression_&& stop, Expression_&& step):
+        start(std::move(start)), stop(std::move(stop)), step(std::move(step)) {}
+    Str to_string() const override { return "slice"; }
+};
+
 struct ListExpr: Expression{
+    std::vector<Expression_> items;
     Str to_string() const override { return "[]"; }
 };
 
 struct DictExpr: Expression{
+    std::vector<Expression_> items;     // each item is a DictItemExpr
     Str to_string() const override { return "{}"; }
 };
 
+struct SetExpr: Expression{
+    std::vector<Expression_> items;
+    Str to_string() const override { return "{}"; }
+};
+
+
+struct TupleExpr: Expression{
+    std::vector<Expression_> items;
+    TupleExpr(std::vector<Expression_>&& items): items(std::move(items)) {}
+    Str to_string() const override { return "(a, b, c)"; }
+};
+
+struct CompExpr: Expression{
+    Expression_ expr;       // loop expr
+    Expression_ vars;       // loop vars
+    Expression_ iter;       // loop iter
+    Expression_ cond;       // optional if condition
+    virtual void emit_expr() = 0;
+};
+
+// a:b
+struct DictItemExpr: Expression{
+    Expression_ key;
+    Expression_ value;
+    DictItemExpr(Expression_&& key, Expression_&& value)
+        : key(std::move(key)), value(std::move(value)) {}
+    Str to_string() const override { return "dict item"; }
+};
+
+struct ListCompExpr: CompExpr{
+};
+
+struct DictCompExpr: CompExpr{
+};
+
+struct SetCompExpr: CompExpr{
+};
+
 struct LambdaExpr: Expression{
+    Function func;
+    NameScope scope;
+    LambdaExpr(Function&& func, NameScope scope): func(std::move(func)), scope(scope) {}
     Str to_string() const override { return "lambda"; }
 };
 
 struct FStringExpr: Expression{
+    Str src;
+    FStringExpr(const Str& src): src(src) {}
     Str to_string() const override { return "@fstr"; }
 };
 
+struct SubscrExpr: Expression{
+    Expression_ a;
+    Expression_ b;
+    SubscrExpr(Expression_&& a, Expression_&& b): a(std::move(a)), b(std::move(b)) {}
+    Str to_string() const override { return "a[b]"; }
+};
+
 struct AttribExpr: Expression{
+    Expression_ a;
+    Str b;
+    AttribExpr(Expression_ a, const Str& b): a(std::move(a)), b(b) {}
+    AttribExpr(Expression_ a, Str&& b): a(std::move(a)), b(std::move(b)) {}
     Str to_string() const override { return "."; }
 };
 
+struct AssignExpr: Expression{
+    Expression_ lhs;
+    Expression_ rhs;
+    AssignExpr(Expression_&& lhs, Expression_&& rhs): lhs(std::move(lhs)), rhs(std::move(rhs)) {}
+    Str to_string() const override { return "="; }
+};
+
+struct InplaceAssignExpr: Expression{
+    TokenIndex op;
+    Expression_ lhs;
+    Expression_ rhs;
+    InplaceAssignExpr(TokenIndex op, Expression_&& lhs, Expression_&& rhs)
+        : op(op), lhs(std::move(lhs)), rhs(std::move(rhs)) {}
+    Str to_string() const override { return TK_STR(op); }
+};
+
+
 struct CallExpr: Expression{
+    std::vector<Expression_> args;
+    std::vector<std::pair<Str, Expression_>> kwargs;
     Str to_string() const override { return "()"; }
 };
 
 struct BinaryExpr: Expression{
     TokenIndex op;
-    BinaryExpr(TokenIndex op): op(op) {}
+    Expression_ lhs;
+    Expression_ rhs;
+    BinaryExpr(TokenIndex op, Expression_&& lhs, Expression_&& rhs)
+        : op(op), lhs(std::move(lhs)), rhs(std::move(rhs)) {}
     Str to_string() const override { return TK_STR(op); }
 };
 
 struct TernaryExpr: Expression{
+    Expression_ cond;
+    Expression_ true_expr;
+    Expression_ false_expr;
+    TernaryExpr(Expression_&& cond, Expression_&& true_expr, Expression_&& false_expr)
+        : cond(std::move(cond)), true_expr(std::move(true_expr)), false_expr(std::move(false_expr)) {}
     Str to_string() const override { return "?"; }
 };
 
-struct AssignExpr: Expression{
-    Str to_string() const override { return "="; }
-};
-
-struct CommaExpr: Expression{
-    Str to_string() const override { return ","; }
-};
-
 
 } // namespace pkpy

+ 5 - 5
src/lexer.h

@@ -68,11 +68,12 @@ struct Token{
   }
 };
 
-// https://docs.python.org/3/reference/expressions.html
+// https://docs.python.org/3/reference/expressions.html#operator-precedence
 enum Precedence {
   PREC_NONE,
   PREC_ASSIGNMENT,    // =
   PREC_COMMA,         // ,
+  PREC_SLICE,         // :   (only available inside a subscript expression)
   PREC_TERNARY,       // ?:
   PREC_LOGICAL_OR,    // or
   PREC_LOGICAL_AND,   // and
@@ -135,7 +136,7 @@ struct Lexer {
         if(brackets_level > 0) return true;
         int spaces = eat_spaces();
         if(peekchar() == '#') skip_line_comment();
-        if(peekchar() == '\0' || peekchar() == '\n' || peekchar() == '\r') return true;
+        if(peekchar() == '\0' || peekchar() == '\n') return true;
         // https://docs.python.org/3/reference/lexical_analysis.html#indentation
         if(spaces > indents.top()){
             indents.push(spaces);
@@ -428,7 +429,6 @@ struct Lexer {
                         add_token_2('=', TK("/"), TK("/="));
                     }
                     return true;
-                case '\r': break;       // just ignore '\r'
                 case ' ': case '\t': eat_spaces(); break;
                 case '\n': {
                     add_token(TK("@eol"));
@@ -493,8 +493,8 @@ struct Lexer {
 
     Lexer(shared_ptr<SourceData> src) {
         this->src = src;
-        this->token_start = src->source;
-        this->curr_char = src->source;
+        this->token_start = src->source.c_str();
+        this->curr_char = src->source.c_str();
         this->nexts.push_back(Token{TK("@sof"), token_start, 0, current_line});
         this->indents.push(0);
     }

+ 0 - 1
src/main.cpp

@@ -21,7 +21,6 @@ std::string getline(bool* eof=nullptr) {
     std::string output;
     output.resize(length);
     WideCharToMultiByte(CP_UTF8, 0, wideInput.c_str(), (int)wideInput.length(), &output[0], length, NULL, NULL);
-    if(!output.empty() && output.back() == '\r') output.pop_back();
     return output;
 }
 

+ 24 - 24
src/vm.h

@@ -357,30 +357,30 @@ inline void CodeObject::optimize(VM* vm){
     perfect_locals_capacity = find_next_capacity(base_n);
     perfect_hash_seed = find_perfect_hash_seed(perfect_locals_capacity, keys);
 
-    for(int i=1; i<codes.size(); i++){
-        if(codes[i].op == OP_UNARY_NEGATIVE && codes[i-1].op == OP_LOAD_CONST){
-            codes[i].op = OP_NO_OP;
-            int pos = codes[i-1].arg;
-            consts[pos] = vm->num_negated(consts[pos]);
-        }
-
-        if(i>=2 && codes[i].op == OP_BUILD_INDEX){
-            const Bytecode& a = codes[i-1];
-            const Bytecode& x = codes[i-2];
-            if(codes[i].arg == 1){
-                if(a.op == OP_LOAD_NAME && x.op == OP_LOAD_NAME){
-                    codes[i].op = OP_FAST_INDEX;
-                }else continue;
-            }else{
-                if(a.op == OP_LOAD_NAME_REF && x.op == OP_LOAD_NAME_REF){
-                    codes[i].op = OP_FAST_INDEX_REF;
-                }else continue;
-            }
-            codes[i].arg = (a.arg << 16) | x.arg;
-            codes[i-1].op = OP_NO_OP;
-            codes[i-2].op = OP_NO_OP;
-        }
-    }
+    // for(int i=1; i<codes.size(); i++){
+    //     if(codes[i].op == OP_UNARY_NEGATIVE && codes[i-1].op == OP_LOAD_CONST){
+    //         codes[i].op = OP_NO_OP;
+    //         int pos = codes[i-1].arg;
+    //         consts[pos] = vm->num_negated(consts[pos]);
+    //     }
+
+    //     if(i>=2 && codes[i].op == OP_BUILD_INDEX){
+    //         const Bytecode& a = codes[i-1];
+    //         const Bytecode& x = codes[i-2];
+    //         if(codes[i].arg == 1){
+    //             if(a.op == OP_LOAD_NAME && x.op == OP_LOAD_NAME){
+    //                 codes[i].op = OP_FAST_INDEX;
+    //             }else continue;
+    //         }else{
+    //             if(a.op == OP_LOAD_NAME_REF && x.op == OP_LOAD_NAME_REF){
+    //                 codes[i].op = OP_FAST_INDEX_REF;
+    //             }else continue;
+    //         }
+    //         codes[i].arg = (a.arg << 16) | x.arg;
+    //         codes[i-1].op = OP_NO_OP;
+    //         codes[i-2].op = OP_NO_OP;
+    //     }
+    // }
 
     // pre-compute sn in co_consts
     for(int i=0; i<consts.size(); i++){

Some files were not shown because too many files changed in this diff