blueloveTH 2 лет назад
Родитель
Сommit
6bb307734c
6 измененных файлов с 182 добавлено и 194 удалено
  1. 37 15
      src/compiler.h
  2. 35 61
      src/expr.h
  3. 22 19
      src/iter.h
  4. 13 14
      src/lexer.h
  5. 2 4
      src/obj.h
  6. 73 81
      src/vm.h

+ 37 - 15
src/compiler.h

@@ -582,20 +582,41 @@ class Compiler {
     }
 
     bool try_compile_assignment(){
-        //     switch (op) {
-        //         case TK("+="):      emit(OP_BINARY_OP, 0);  break;
-        //         case TK("-="):      emit(OP_BINARY_OP, 1);  break;
-        //         case TK("*="):      emit(OP_BINARY_OP, 2);  break;
-        //         case TK("/="):      emit(OP_BINARY_OP, 3);  break;
-        //         case TK("//="):     emit(OP_BINARY_OP, 4);  break;
-        //         case TK("%="):      emit(OP_BINARY_OP, 5);  break;
-        //         case TK("<<="):     emit(OP_BITWISE_OP, 0);  break;
-        //         case TK(">>="):     emit(OP_BITWISE_OP, 1);  break;
-        //         case TK("&="):      emit(OP_BITWISE_OP, 2);  break;
-        //         case TK("|="):      emit(OP_BITWISE_OP, 3);  break;
-        //         case TK("^="):      emit(OP_BITWISE_OP, 4);  break;
-        //         default: UNREACHABLE();
-        //     }
+        Expr_ lhs = ctx()->s_expr.popx();
+        switch (curr().type) {
+            // case TK("+="):      lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 0);  break;
+            // case TK("-="):      lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 1);  break;
+            // case TK("*="):      lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 2);  break;
+            // case TK("/="):      lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 3);  break;
+            // case TK("//="):     lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 4);  break;
+            // case TK("%="):      lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 5);  break;
+            // case TK("<<="):     lhs->emit(ctx()); advance(); emit(OP_BITWISE_OP, 0);  break;
+            // case TK(">>="):     lhs->emit(ctx()); advance(); emit(OP_BITWISE_OP, 1);  break;
+            // case TK("&="):      lhs->emit(ctx()); advance(); emit(OP_BITWISE_OP, 2);  break;
+            // case TK("|="):      lhs->emit(ctx()); advance(); emit(OP_BITWISE_OP, 3);  break;
+            // case TK("^="):      lhs->emit(ctx()); advance(); emit(OP_BITWISE_OP, 4);  break;
+            // case TK("="):       advance(); break;
+            case TK("+="): case TK("-="): case TK("*="): case TK("/="): case TK("//="): case TK("%="):
+            case TK("<<="): case TK(">>="): case TK("&="): case TK("|="): case TK("^="): {
+                advance();
+                auto e = make_expr<BinaryExpr>();
+                e->op = prev().type;
+                e->lhs = lhs;       // here should be a copy
+                EXPR_TUPLE();
+                e->rhs = ctx()->s_expr.popx();
+                // ...
+            } break;
+            case TK("="): advance(); break;
+            default: return false;
+        }
+        if(prev().type == TK("=")){
+            EXPR_TUPLE();
+            Expr_ rhs = ctx()->s_expr.popx();
+            // do assign here
+            // lhs = rhs
+            return true;
+        }
+        return true;
     }
 
     void compile_stmt() {
@@ -704,8 +725,9 @@ class Compiler {
             /*************************************************/
             // handle dangling expression or assignment
             default: {
-                EXPR_TUPLE(true);
+                EXPR_TUPLE();
                 if(!try_compile_assignment()){
+                    ctx()->emit_expr();
                     if(mode()==REPL_MODE && name_scope()==NAME_GLOBAL){
                         ctx()->emit(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE);
                     }else{

+ 35 - 61
src/expr.h

@@ -18,6 +18,7 @@ struct Expr{
 
     virtual std::vector<const Expr*> children() const { return {}; }
     virtual bool is_starred() const { return false; }
+    virtual bool is_literal() const { return false; }
 
     // for OP_DELETE_XXX
     virtual bool emit_del(CodeEmitContext* ctx) { return false; }
@@ -162,20 +163,6 @@ struct StarredExpr: Expr{
     }
 };
 
-// PASS
-struct NegatedExpr: Expr{
-    Expr_ child;
-    NegatedExpr(Expr_&& child): child(std::move(child)) {}
-    Str str() const override { return "-"; }
-
-    std::vector<const Expr*> children() const override { return {child.get()}; }
-
-    void emit(CodeEmitContext* ctx) override {
-        child->emit(ctx);
-        ctx->emit(OP_UNARY_NEGATIVE, BC_NOARG, line);
-    }
-};
-
 // PASS
 struct NotExpr: Expr{
     Expr_ child;
@@ -265,19 +252,48 @@ struct LiteralExpr: Expr{
         if(std::holds_alternative<i64>(value)){
             obj = VAR(std::get<i64>(value));
         }
-
         if(std::holds_alternative<f64>(value)){
             obj = VAR(std::get<f64>(value));
         }
-
         if(std::holds_alternative<Str>(value)){
             obj = VAR(std::get<Str>(value));
         }
-
-        if(!obj) UNREACHABLE();
+        if(obj == nullptr) UNREACHABLE();
         int index = ctx->add_const(obj);
         ctx->emit(OP_LOAD_CONST, index, line);
     }
+
+    bool is_literal() const override { return true; }
+};
+
+// PASS
+struct NegatedExpr: Expr{
+    Expr_ child;
+    NegatedExpr(Expr_&& child): child(std::move(child)) {}
+    Str str() const override { return "-"; }
+
+    std::vector<const Expr*> children() const override { return {child.get()}; }
+
+    void emit(CodeEmitContext* ctx) override {
+        VM* vm = ctx->vm;
+        // if child is a int of float, do constant folding
+        if(child->is_literal()){
+            LiteralExpr* lit = static_cast<LiteralExpr*>(child.get());
+            PyObject* obj = nullptr;
+            if(std::holds_alternative<i64>(lit->value)){
+                obj = VAR(std::get<i64>(lit->value));
+            }
+            if(std::holds_alternative<f64>(lit->value)){
+                obj = VAR(std::get<f64>(lit->value));
+            }
+            if(obj != nullptr){
+                ctx->emit(OP_LOAD_CONST, ctx()->add_const(obj), line);
+                return;
+            }
+        }
+        child->emit(ctx);
+        ctx->emit(OP_UNARY_NEGATIVE, BC_NOARG, line);
+    }
 };
 
 // PASS
@@ -629,46 +645,4 @@ struct TernaryExpr: Expr{
 };
 
 
-} // namespace pkpy
-
-
-// struct TupleRef : BaseRef {
-//     Tuple objs;
-//     TupleRef(Tuple&& objs) : objs(std::move(objs)) {}
-
-//     PyObject* get(VM* vm, Frame* frame) const{
-//         Tuple args(objs.size());
-//         for (int i = 0; i < objs.size(); i++) {
-//             args[i] = vm->PyRef_AS_C(objs[i])->get(vm, frame);
-//         }
-//         return VAR(std::move(args));
-//     }
-
-//     void set(VM* vm, Frame* frame, PyObject* val) const{
-//         val = vm->asIter(val);
-//         BaseIter* iter = vm->PyIter_AS_C(val);
-//         for(int i=0; i<objs.size(); i++){
-//             PyObject* x;
-//             if(is_type(objs[i], vm->tp_star_wrapper)){
-//                 auto& star = _CAST(StarWrapper&, objs[i]);
-//                 if(star.rvalue) vm->ValueError("can't use starred expression here");
-//                 if(i != objs.size()-1) vm->ValueError("* can only be used at the end");
-//                 auto ref = vm->PyRef_AS_C(star.obj);
-//                 List list;
-//                 while((x = iter->next()) != nullptr) list.push_back(x);
-//                 ref->set(vm, frame, VAR(std::move(list)));
-//                 return;
-//             }else{
-//                 x = iter->next();
-//                 if(x == nullptr) vm->ValueError("not enough values to unpack");
-//                 vm->PyRef_AS_C(objs[i])->set(vm, frame, x);
-//             }
-//         }
-//         PyObject* x = iter->next();
-//         if(x != nullptr) vm->ValueError("too many values to unpack");
-//     }
-
-//     void del(VM* vm, Frame* frame) const{
-//         for(int i=0; i<objs.size(); i++) vm->PyRef_AS_C(objs[i])->del(vm, frame);
-//     }
-// };
+} // namespace pkpy

+ 22 - 19
src/iter.h

@@ -6,10 +6,10 @@ namespace pkpy{
 
 class RangeIter : public BaseIter {
     i64 current;
-    Range r;
+    Range r;    // copy by value, so we don't need to keep ref
 public:
-    RangeIter(VM* vm, PyObject* _ref) : BaseIter(vm, _ref) {
-        this->r = OBJ_GET(Range, _ref);
+    RangeIter(VM* vm, PyObject* ref) : BaseIter(vm) {
+        this->r = OBJ_GET(Range, ref);
         this->current = r.start;
     }
 
@@ -26,28 +26,37 @@ public:
 
 template <typename T>
 class ArrayIter : public BaseIter {
-    size_t index = 0;
-    const T* p;
+    int index;
+    PyObject* ref;
 public:
-    ArrayIter(VM* vm, PyObject* _ref) : BaseIter(vm, _ref) { p = &OBJ_GET(T, _ref);}
-    PyObject* next(){
+    ArrayIter(VM* vm, PyObject* ref) : BaseIter(vm), ref(ref), index(0) {}
+
+    PyObject* next() override{
+        const T* p = &OBJ_GET(T, ref);
         if(index == p->size()) return nullptr;
         return p->operator[](index++); 
     }
+
+    void _mark() override {
+        OBJ_MARK(ref);
+    }
 };
 
 class StringIter : public BaseIter {
     int index = 0;
-    Str* str;
+    PyObject* ref;
 public:
-    StringIter(VM* vm, PyObject* _ref) : BaseIter(vm, _ref) {
-        str = &OBJ_GET(Str, _ref);
-    }
+    StringIter(VM* vm, PyObject* ref) : BaseIter(vm), ref(ref) {}
 
-    PyObject* next() {
+    PyObject* next() override{
+        Str* str = &OBJ_GET(Str, ref);
         if(index == str->u8_length()) return nullptr;
         return VAR(str->u8_getitem(index++));
     }
+
+    void _mark() override {
+        OBJ_MARK(ref);
+    }
 };
 
 inline PyObject* Generator::next(){
@@ -58,20 +67,14 @@ inline PyObject* Generator::next(){
         frame = std::move(vm->callstack.top());
         vm->callstack.pop();
         state = 1;
-        return frame->pop_value(vm);
+        return frame->popx();
     }else{
         state = 2;
         return nullptr;
     }
 }
 
-inline void BaseIter::_mark() {
-    if(_ref != nullptr) OBJ_MARK(_ref);
-    if(loop_var != nullptr) OBJ_MARK(loop_var);
-}
-
 inline void Generator::_mark(){
-    BaseIter::_mark();
     if(frame!=nullptr) frame->_mark();
 }
 

+ 13 - 14
src/lexer.h

@@ -9,21 +9,23 @@ namespace pkpy{
 typedef uint8_t TokenIndex;
 
 constexpr const char* kTokens[] = {
+    "is not", "not in",
     "@eof", "@eol", "@sof",
-    ".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "%", "::",
-    "+", "-", "*", "/", "//", "**", "=", ">", "<", "...", "->",
-    "<<", ">>", "&", "|", "^", "?", "@",
-    "==", "!=", ">=", "<=",
-    "+=", "-=", "*=", "/=", "//=", "%=", "&=", "|=", "^=", ">>=", "<<=",
+    "@id", "@num", "@str", "@fstr",
+    "@indent", "@dedent",
+    /*****************************************/
+    "+", "+=", "-", "-=",   // (INPLACE_OP - 1) can get '=' removed
+    "*", "*=", "/", "/=", "//", "//=", "%", "%=",
+    "&", "&=", "|", "|=", "^", "^=", 
+    "<<", "<<=", ">>", ">>=",
+    /*****************************************/
+    ".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "::",
+    "**", "=", ">", "<", "...", "->", "?", "@", "==", "!=", ">=", "<=",
     /** KW_BEGIN **/
     "class", "import", "as", "def", "lambda", "pass", "del", "from", "with", "yield",
     "None", "in", "is", "and", "or", "not", "True", "False", "global", "try", "except", "finally",
     "goto", "label",      // extended keywords, not available in cpython
-    "while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise",
-    /** KW_END **/
-    "is not", "not in",
-    "@id", "@num", "@str", "@fstr",
-    "@indent", "@dedent"
+    "while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise"
 };
 
 using TokenValue = std::variant<std::monostate, i64, f64, Str>;
@@ -40,12 +42,9 @@ constexpr TokenIndex TK(const char token[]) {
 }
 
 #define TK_STR(t) kTokens[t]
-const TokenIndex kTokenKwBegin = TK("class");
-const TokenIndex kTokenKwEnd = TK("raise");
-
 const std::map<std::string_view, TokenIndex> kTokenKwMap = [](){
     std::map<std::string_view, TokenIndex> map;
-    for(int k=kTokenKwBegin; k<=kTokenKwEnd; k++) map[kTokens[k]] = k;
+    for(int k=TK("class"); k<kTokenCount; k++) map[kTokens[k]] = k;
     return map;
 }();
 

+ 2 - 4
src/obj.h

@@ -82,12 +82,10 @@ struct Slice {
 class BaseIter {
 protected:
     VM* vm;
-    PyObject* _ref;     // keep a reference to the object so it will not be deleted while iterating
 public:
+    BaseIter(VM* vm) : vm(vm) {}
+    virtual void _mark() {}
     virtual PyObject* next() = 0;
-    PyObject* loop_var;
-    BaseIter(VM* vm, PyObject* _ref) : vm(vm), _ref(_ref) {}
-    virtual void _mark();
     virtual ~BaseIter() = default;
 };
 

+ 73 - 81
src/vm.h

@@ -33,7 +33,7 @@ class Generator: public BaseIter {
     int state; // 0,1,2
 public:
     Generator(VM* vm, std::unique_ptr<Frame>&& frame)
-        : BaseIter(vm, nullptr), frame(std::move(frame)), state(0) {}
+        : BaseIter(vm), frame(std::move(frame)), state(0) {}
 
     PyObject* next() override;
     void _mark() override;
@@ -351,19 +351,11 @@ inline PyObject* NativeFunc::operator()(VM* vm, Args& args) const{
 }
 
 inline void CodeObject::optimize(VM* vm){
-    std::vector<StrName> keys;
-    for(auto& p: names) if(p.second == NAME_LOCAL) keys.push_back(p.first);
-    uint32_t base_n = (uint32_t)(keys.size() / kLocalsLoadFactor + 0.5);
+    // here we simple pass all names, but only some of them are NAME_LOCAL
+    // TODO: ...
+    uint32_t base_n = (uint32_t)(names.size() / kLocalsLoadFactor + 0.5);
     perfect_locals_capacity = find_next_capacity(base_n);
-    perfect_hash_seed = find_perfect_hash_seed(perfect_locals_capacity, keys);
-
-    for(int i=1; i<codes.size(); i++){
-        if(codes[i].op == OP_UNARY_NEGATIVE && codes[i-1].op == OP_LOAD_CONST){
-            codes[i].op = OP_NO_OP;
-            int pos = codes[i-1].arg;
-            consts[pos] = vm->num_negated(consts[pos]);
-        }
-    }
+    perfect_hash_seed = find_perfect_hash_seed(perfect_locals_capacity, names);
 
     // pre-compute sn in co_consts
     for(int i=0; i<consts.size(); i++){
@@ -559,73 +551,74 @@ inline PyObject* VM::new_module(StrName name) {
 }
 
 inline Str VM::disassemble(CodeObject_ co){
-    auto pad = [](const Str& s, const int n){
-        if(s.size() >= n) return s.substr(0, n);
-        return s + std::string(n - s.size(), ' ');
-    };
-
-    std::vector<int> jumpTargets;
-    for(auto byte : co->codes){
-        if(byte.op == OP_JUMP_ABSOLUTE || byte.op == OP_POP_JUMP_IF_FALSE){
-            jumpTargets.push_back(byte.arg);
-        }
-    }
-    StrStream ss;
-    ss << std::string(54, '-') << '\n';
-    ss << co->name << ":\n";
-    int prev_line = -1;
-    for(int i=0; i<co->codes.size(); i++){
-        const Bytecode& byte = co->codes[i];
-        if(byte.op == OP_NO_OP) continue;
-        Str line = std::to_string(byte.line);
-        if(byte.line == prev_line) line = "";
-        else{
-            if(prev_line != -1) ss << "\n";
-            prev_line = byte.line;
-        }
-
-        std::string pointer;
-        if(std::find(jumpTargets.begin(), jumpTargets.end(), i) != jumpTargets.end()){
-            pointer = "-> ";
-        }else{
-            pointer = "   ";
-        }
-        ss << pad(line, 8) << pointer << pad(std::to_string(i), 3);
-        ss << " " << pad(OP_NAMES[byte.op], 20) << " ";
-        // ss << pad(byte.arg == -1 ? "" : std::to_string(byte.arg), 5);
-        std::string argStr = byte.arg == -1 ? "" : std::to_string(byte.arg);
-        if(byte.op == OP_LOAD_CONST){
-            argStr += " (" + CAST(Str, asRepr(co->consts[byte.arg])) + ")";
-        }
-        if(byte.op == OP_LOAD_NAME_REF || byte.op == OP_LOAD_NAME || byte.op == OP_RAISE || byte.op == OP_STORE_NAME){
-            argStr += " (" + co->names[byte.arg].first.str().escape(true) + ")";
-        }
-        ss << argStr;
-        // ss << pad(argStr, 20);      // may overflow
-        // ss << co->blocks[byte.block].to_string();
-        if(i != co->codes.size() - 1) ss << '\n';
-    }
-    StrStream consts;
-    consts << "co_consts: ";
-    consts << CAST(Str, asRepr(VAR(co->consts)));
-
-    StrStream names;
-    names << "co_names: ";
-    List list;
-    for(int i=0; i<co->names.size(); i++){
-        list.push_back(VAR(co->names[i].first.str()));
-    }
-    names << CAST(Str, asRepr(VAR(list)));
-    ss << '\n' << consts.str() << '\n' << names.str() << '\n';
-
-    for(int i=0; i<co->consts.size(); i++){
-        PyObject* obj = co->consts[i];
-        if(is_type(obj, tp_function)){
-            const auto& f = CAST(Function&, obj);
-            ss << disassemble(f.code);
-        }
-    }
-    return Str(ss.str());
+    return "";
+    // auto pad = [](const Str& s, const int n){
+    //     if(s.size() >= n) return s.substr(0, n);
+    //     return s + std::string(n - s.size(), ' ');
+    // };
+
+    // std::vector<int> jumpTargets;
+    // for(auto byte : co->codes){
+    //     if(byte.op == OP_JUMP_ABSOLUTE || byte.op == OP_POP_JUMP_IF_FALSE){
+    //         jumpTargets.push_back(byte.arg);
+    //     }
+    // }
+    // StrStream ss;
+    // ss << std::string(54, '-') << '\n';
+    // ss << co->name << ":\n";
+    // int prev_line = -1;
+    // for(int i=0; i<co->codes.size(); i++){
+    //     const Bytecode& byte = co->codes[i];
+    //     if(byte.op == OP_NO_OP) continue;
+    //     Str line = std::to_string(byte.line);
+    //     if(byte.line == prev_line) line = "";
+    //     else{
+    //         if(prev_line != -1) ss << "\n";
+    //         prev_line = byte.line;
+    //     }
+
+    //     std::string pointer;
+    //     if(std::find(jumpTargets.begin(), jumpTargets.end(), i) != jumpTargets.end()){
+    //         pointer = "-> ";
+    //     }else{
+    //         pointer = "   ";
+    //     }
+    //     ss << pad(line, 8) << pointer << pad(std::to_string(i), 3);
+    //     ss << " " << pad(OP_NAMES[byte.op], 20) << " ";
+    //     // ss << pad(byte.arg == -1 ? "" : std::to_string(byte.arg), 5);
+    //     std::string argStr = byte.arg == -1 ? "" : std::to_string(byte.arg);
+    //     if(byte.op == OP_LOAD_CONST){
+    //         argStr += " (" + CAST(Str, asRepr(co->consts[byte.arg])) + ")";
+    //     }
+    //     if(byte.op == OP_LOAD_NAME_REF || byte.op == OP_LOAD_NAME || byte.op == OP_RAISE || byte.op == OP_STORE_NAME){
+    //         argStr += " (" + co->names[byte.arg].first.str().escape(true) + ")";
+    //     }
+    //     ss << argStr;
+    //     // ss << pad(argStr, 20);      // may overflow
+    //     // ss << co->blocks[byte.block].to_string();
+    //     if(i != co->codes.size() - 1) ss << '\n';
+    // }
+    // StrStream consts;
+    // consts << "co_consts: ";
+    // consts << CAST(Str, asRepr(VAR(co->consts)));
+
+    // StrStream names;
+    // names << "co_names: ";
+    // List list;
+    // for(int i=0; i<co->names.size(); i++){
+    //     list.push_back(VAR(co->names[i].first.str()));
+    // }
+    // names << CAST(Str, asRepr(VAR(list)));
+    // ss << '\n' << consts.str() << '\n' << names.str() << '\n';
+
+    // for(int i=0; i<co->consts.size(); i++){
+    //     PyObject* obj = co->consts[i];
+    //     if(is_type(obj, tp_function)){
+    //         const auto& f = CAST(Function&, obj);
+    //         ss << disassemble(f.code);
+    //     }
+    // }
+    // return Str(ss.str());
 }
 
 inline void VM::init_builtin_types(){
@@ -769,7 +762,6 @@ inline void VM::unpack_args(Args& args){
     for(int i=0; i<args.size(); i++){
         if(is_type(args[i], tp_star_wrapper)){
             auto& star = _CAST(StarWrapper&, args[i]);
-            if(!star.rvalue) UNREACHABLE();
             List& list = CAST(List&, asList(star.obj));
             unpacked.insert(unpacked.end(), list.begin(), list.end());
         }else{