Parcourir la source

impl f-string

blueloveTH il y a 3 ans
Parent
commit
d4d312fc36
8 fichiers modifiés avec 125 ajouts et 32 suppressions
  1. 8 0
      src/codeobject.h
  2. 78 28
      src/compiler.h
  3. 4 2
      src/main.cpp
  4. 2 0
      src/opcodes.h
  5. 1 1
      src/parser.h
  6. 12 0
      src/pocketpy.h
  7. 1 1
      src/str.h
  8. 19 0
      src/vm.h

+ 8 - 0
src/codeobject.h

@@ -25,7 +25,15 @@ _Str pad(const _Str& s, const int n){
     return s + _Str(n - s.size(), ' ');
 }
 
+enum CompileMode {
+    EXEC_MODE,
+    EVAL_MODE,
+    SINGLE_MODE
+};
+
 struct CodeObject {
+    CompileMode mode = EXEC_MODE;
+
     std::vector<ByteCode> co_code;
     _Str co_filename;
     _Str co_name;

+ 78 - 28
src/compiler.h

@@ -41,8 +41,6 @@ struct Loop {
 class Compiler {
 public:
     std::unique_ptr<Parser> parser;
-    bool repl_mode;
-
     std::stack<_Code> codes;
     std::stack<Loop> loops;
 
@@ -61,10 +59,13 @@ public:
         return loops.top();
     }
 
-    Compiler(VM* vm, const char* source, _Code code, bool repl_mode){
+    CompileMode mode() {
+        return getCode()->mode;
+    }
+
+    Compiler(VM* vm, const char* source, _Code code){
         this->vm = vm;
         this->codes.push(code);
-        this->repl_mode = repl_mode;
         if (!code->co_filename.empty()) path = code->co_filename;
         this->parser = std::make_unique<Parser>(source);
 
@@ -103,6 +104,7 @@ public:
         rules[TK("@id")] =      { METHOD(exprName),      NO_INFIX };
         rules[TK("@num")] =     { METHOD(exprLiteral),   NO_INFIX };
         rules[TK("@str")] =     { METHOD(exprLiteral),   NO_INFIX };
+        rules[TK("@fstr")] =    { METHOD(exprFString),   NO_INFIX };
         rules[TK("=")] =        { nullptr,               METHOD(exprAssign),         PREC_ASSIGNMENT };
         rules[TK("+=")] =       { nullptr,               METHOD(exprAssign),         PREC_ASSIGNMENT };
         rules[TK("-=")] =       { nullptr,               METHOD(exprAssign),         PREC_ASSIGNMENT };
@@ -118,9 +120,8 @@ public:
 #define EXPR_ANY() parsePrecedence(PREC_NONE)
     }
 
-    void eatString(bool single_quote) {
+    _Str eatStringUntil(char quote) {
         std::vector<char> buff;
-        char quote = (single_quote) ? '\'' : '"';
         while (true) {
             char c = parser->eatChar();
             if (c == quote) break;
@@ -134,16 +135,23 @@ public:
                     case 'n':  buff.push_back('\n'); break;
                     case 'r':  buff.push_back('\r'); break;
                     case 't':  buff.push_back('\t'); break;
-                    case '\n': break; // Just ignore the next line.
-                    case '\r': if (parser->matchChar('\n')) break;
-                    default: throw SyntaxError(path, parser->makeErrToken(), "invalid syntax");
+                    case '\n': case '\r': break;
+                    default: throw SyntaxError(path, parser->makeErrToken(), "invalid escape character");
                 }
             } else {
                 buff.push_back(c);
             }
         }
+        return _Str(buff.data(), buff.size());
+    }
 
-        parser->setNextToken(TK("@str"), vm->PyStr(_Str(buff.data(), buff.size())));
+    void eatString(char quote, bool fstr) {
+        _Str s = eatStringUntil(quote);
+        if(fstr){
+            parser->setNextToken(TK("@fstr"), vm->PyStr(s));
+        }else{
+            parser->setNextToken(TK("@str"), vm->PyStr(s));
+        }
     }
 
     void eatNumber() {
@@ -182,8 +190,7 @@ public:
             parser->token_start = parser->current_char;
             char c = parser->eatCharIncludeNewLine();
             switch (c) {
-                case '"': eatString(false); return;
-                case '\'': eatString(true); return;
+                case '\'': case '"': eatString(c, false); return;
                 case '#': parser->skipLineComment(); break;
                 case '{': parser->setNextToken(TK("{")); return;
                 case '}': parser->setNextToken(TK("}")); return;
@@ -232,6 +239,10 @@ public:
                     if (isdigit(c)) {
                         eatNumber();
                     } else if (isalpha(c) || c=='_') {
+                        if(c == 'f'){
+                            if(parser->matchChar('\'')) {eatString('\'', true); return;}
+                            if(parser->matchChar('"')) {eatString('"', true); return;}
+                        }
                         parser->eatName();
                     } else {
                         throw SyntaxError(path, parser->makeErrToken(), "unknown character: %c", c);
@@ -297,6 +308,35 @@ public:
         emitCode(OP_LOAD_CONST, index);
     }
 
+    void exprFString() {
+        PyVar value = parser->previous.value;
+        std::string s = vm->PyStr_AS_C(value).str();
+        std::regex pattern(R"(\{(.*?)\})");
+        std::sregex_iterator begin(s.begin(), s.end(), pattern);
+        std::sregex_iterator end;
+        int size = 0;
+        int i = 0;
+        for(auto it = begin; it != end; it++) {
+            std::smatch m = *it;
+            if (i < m.position()) {
+                std::string literal = s.substr(i, m.position() - i);
+                emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal)));
+                size++;
+            }
+            emitCode(OP_LOAD_EVAL_FN);
+            emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(m[1].str())));
+            emitCode(OP_CALL, 1);
+            size++;
+            i = m.position() + m.length();
+        }
+        if (i < s.size()) {
+            std::string literal = s.substr(i, s.size() - i);
+            emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal)));
+            size++;
+        }
+        emitCode(OP_BUILD_STRING, size);
+    }
+
     void exprLambda() {
         throw SyntaxError(path, parser->previous, "lambda is not implemented yet");
     }
@@ -493,7 +533,7 @@ public:
     
     void __compileBlockBody(CompilerAction action) {
         consume(TK(":"));
-        if(!matchNewLines(repl_mode)){
+        if(!matchNewLines(mode()==SINGLE_MODE)){
             throw SyntaxError(path, parser->previous, "expected a new line after ':'");
         }
         consume(TK("@indent"));
@@ -657,7 +697,7 @@ public:
             // If last op is not an assignment, pop the result.
             uint8_t lastOp = getCode()->co_code.back().op;
             if( lastOp != OP_STORE_NAME_PTR && lastOp != OP_STORE_PTR){
-                if(repl_mode && parser->indents.top() == 0){
+                if(mode()==SINGLE_MODE && parser->indents.top() == 0){
                     emitCode(OP_PRINT_EXPR);
                 }
                 emitCode(OP_POP_TOP);
@@ -713,6 +753,8 @@ public:
                 const _Str& name = parser->previous.str();
                 if(func.hasName(name)) throw SyntaxError(path, parser->previous, "duplicate argument name");
 
+                if(state == 0 && peek() == TK("=")) state = 2;
+
                 switch (state)
                 {
                     case 0: func.args.push_back(name); break;
@@ -740,7 +782,7 @@ public:
         if(match(TK("True"))) goto __LITERAL_EXIT;
         if(match(TK("False"))) goto __LITERAL_EXIT;
         if(match(TK("None"))) goto __LITERAL_EXIT;
-        throw SyntaxError(path, parser->previous, "expect a literal");
+        throw SyntaxError(path, parser->previous, "expect a literal, not %s", TK_STR(parser->current.type));
 __LITERAL_EXIT:
         return parser->previous.value;
     }
@@ -757,26 +799,34 @@ __LITERAL_EXIT:
         }
     }
 
-};
+    void __fillCode(){
+        // Lex initial tokens. current <-- next.
+        lexToken();
+        lexToken();
+        matchNewLines();
 
+        if(mode() == EVAL_MODE) {
+            EXPR_TUPLE();
+            consume(TK("@eof"));
+            return;
+        }
 
-_Code compile(VM* vm, const char* source, _Str filename, bool repl_mode=false) {
+        while (!match(TK("@eof"))) {
+            compileTopLevelStatement();
+            matchNewLines();
+        }
+    }
+};
+
+_Code compile(VM* vm, const char* source, _Str filename, CompileMode mode=EXEC_MODE) {
     // Skip utf8 BOM if there is any.
     if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3;
 
     _Code code = std::make_shared<CodeObject>();
     code->co_filename = filename;
-    Compiler compiler(vm, source, code, repl_mode);
-
-    // Lex initial tokens. current <-- next.
-    compiler.lexToken();
-    compiler.lexToken();
-    compiler.matchNewLines();
-
-    while (!compiler.match(TK("@eof"))) {
-        compiler.compileTopLevelStatement();
-        compiler.matchNewLines();
-    }
+    code->mode = mode;
 
+    Compiler compiler(vm, source, code);
+    compiler.__fillCode();
     return code;
 }

+ 4 - 2
src/main.cpp

@@ -45,6 +45,7 @@ void REPL(){
     VM* vm = newVM();
 
     while(true){
+        CompileMode mode = SINGLE_MODE;
         vm->printFn(need_more_lines ? "... " : ">>> ");
         std::string line;
         std::getline(std::cin, line);
@@ -56,6 +57,7 @@ void REPL(){
             if(n>=2 && buffer[n-1]=='\n' && buffer[n-2]=='\n'){
                 need_more_lines = false;
                 line = buffer;
+                mode = EXEC_MODE;       // tmp set to EXEC_MODE
                 buffer.clear();
             }else{
                 continue;
@@ -65,7 +67,7 @@ void REPL(){
             if(line.empty()) continue;
         }
         try{
-            _Code code = compile(vm, line.c_str(), "<stdin>", true);
+            _Code code = compile(vm, line.c_str(), "<stdin>", mode);
             vm->exec(code);
 #ifdef PK_DEBUG
         }catch(NeedMoreLines& e){
@@ -106,7 +108,7 @@ int main(int argc, char** argv){
             std::string src((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
             VM* vm = newVM();
             Timer timer("编译时间");
-            _Code code = compile(vm, src.c_str(), filename, false);
+            _Code code = compile(vm, src.c_str(), filename);
             timer.stop();
             //std::cout << code->toString() << std::endl;
             Timer timer2("运行时间");

+ 2 - 0
src/opcodes.h

@@ -33,6 +33,7 @@ OPCODE(JUMP_IF_FALSE_OR_POP)
 OPCODE(LOAD_NONE)
 OPCODE(LOAD_TRUE)
 OPCODE(LOAD_FALSE)
+OPCODE(LOAD_EVAL_FN)       // load eval() callable into stack
 
 OPCODE(ASSERT)
 OPCODE(RAISE_ERROR)
@@ -48,5 +49,6 @@ OPCODE(STORE_PTR)           // no arg, [ptr, expr] -> *ptr = expr
 OPCODE(DELETE_PTR)          // no arg, [ptr] -> [] -> delete ptr
 
 OPCODE(BUILD_SMART_TUPLE)   // if all elements are pointers, build a compound pointer, otherwise build a tuple
+OPCODE(BUILD_STRING)        // arg is the expr count, build a string from the top of the stack
 
 #endif

+ 1 - 1
src/parser.h

@@ -20,7 +20,7 @@ constexpr const char* __TOKENS[] = {
     "while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise",
     /** KW_END **/
     "is not", "not in",
-    "@id", "@num", "@str",
+    "@id", "@num", "@str", "@fstr",
     "@indent", "@dedent"
 };
 

+ 12 - 0
src/pocketpy.h

@@ -49,6 +49,14 @@ void __initializeBuiltinFunctions(VM* _vm) {
         return vm->None;
     });
 
+    _vm->bindBuiltinFunc("eval", [](VM* vm, PyVarList args) {
+        if (args.size() != 1) vm->_error("TypeError", "eval() takes exactly one argument");
+        if (!args[0]->isType(vm->_tp_str)) vm->_error("TypeError", "eval() argument must be a string");
+        const _Str& expr = vm->PyStr_AS_C(args[0]);
+        _Code code = compile(vm, expr, "<f-string>", EVAL_MODE);
+        return vm->exec(code);      // not working in function
+    });
+
     _vm->bindBuiltinFunc("repr", [](VM* vm, PyVarList args) {
         return vm->asRepr(args.at(0));
     });
@@ -93,6 +101,10 @@ void __initializeBuiltinFunctions(VM* _vm) {
         return vm->PyStr(s);
     });
 
+    _vm->bindMethod("type", "__new__", [](VM* vm, PyVarList args) {
+        return args.at(1)->attribs["__class__"];
+    });
+
     _vm->bindMethod("range", "__new__", [](VM* vm, PyVarList args) {
         _Range r;
         if( args.size() == 0 ) vm->_error("TypeError", "range expected 1 arguments, got 0");

+ 1 - 1
src/str.h

@@ -3,10 +3,10 @@
 #include <vector>
 #include <string>
 #include <sstream>
+#include <regex>
 
 typedef std::stringstream _StrStream;
 
-
 class _Str {
 private:
     mutable bool utf8_initialized = false;

+ 19 - 0
src/vm.h

@@ -210,6 +210,16 @@ public:
                     pointers[i] = PyPointer_AS_C(items[i]);
                 frame->push(PyPointer(std::make_shared<CompoundPointer>(pointers)));
             } break;
+            case OP_BUILD_STRING:
+            {
+                PyVarList items = frame->popNValuesReversed(this, byte.arg);
+                _StrStream ss;
+                for(const auto& i : items) ss << PyStr_AS_C(asStr(i));
+                frame->push(PyStr(ss));
+            } break;
+            case OP_LOAD_EVAL_FN: {
+                frame->push(builtins->attribs["eval"]);
+            } break;
             case OP_STORE_FUNCTION:
                 {
                     PyVar obj = frame->popValue(this);
@@ -381,6 +391,15 @@ public:
                 break;
             }
         }
+
+        if(frame->code->mode == EVAL_MODE) {
+            if(frame->stackSize() != 1) {
+                _error("SystemError", "stack size is not 1 in EVAL_MODE");
+            }
+            return frame->popValue(this);
+        }
+
+        if(frame->stackSize() != 0) _error("SystemError", "stack not empty in EXEC_MODE");
         callstack.pop();
         return None;
     }