Ver código fonte

reimpl `Str`

blueloveTH 2 anos atrás
pai
commit
c07ae35b8e
12 arquivos alterados com 220 adições e 220 exclusões
  1. 8 8
      src/ceval.h
  2. 10 10
      src/cffi.h
  3. 7 7
      src/compiler.h
  4. 12 11
      src/error.h
  5. 37 39
      src/expr.h
  6. 2 2
      src/frame.h
  7. 4 4
      src/gc.h
  8. 7 6
      src/lexer.h
  9. 2 2
      src/namedict.h
  10. 35 38
      src/pocketpy.h
  11. 58 41
      src/str.h
  12. 38 52
      src/vm.h

+ 8 - 8
src/ceval.h

@@ -33,7 +33,7 @@ __NEXT_STEP:;
     case OP_ROT_TWO: std::swap(frame->top(), frame->top_1()); DISPATCH();
     case OP_PRINT_EXPR: {
         PyObject* obj = frame->top();  // use top() to avoid accidental gc
-        if(obj != None) *_stdout << CAST(Str, asRepr(obj)) << '\n';
+        if(obj != None) *_stdout << CAST(Str&, asRepr(obj)) << '\n';
         frame->pop();
     } DISPATCH();
     /*****************************************/
@@ -168,7 +168,7 @@ __NEXT_STEP:;
         frame->push(VAR(std::move(items)));
     } DISPATCH();
     case OP_BUILD_STRING: {
-        StrStream ss;   // asStr() may run extra bytecode
+        std::stringstream ss;   // asStr() may run extra bytecode
         for(int i=byte.arg-1; i>=0; i--) ss << CAST(Str&, asStr(frame->top_n(i)));
         frame->pop_n(byte.arg);
         frame->push(VAR(ss.str()));
@@ -232,7 +232,7 @@ __NEXT_STEP:;
     case OP_GOTO: {
         StrName label = frame->co->names[byte.arg];
         auto it = frame->co->labels.find(label);
-        if(it == frame->co->labels.end()) _error("KeyError", "label " + label.str().escape(true) + " not found");
+        if(it == frame->co->labels.end()) _error("KeyError", fmt("label ", label.escape(), " not found"));
         frame->jump_abs_break(it->second);
     } DISPATCH();
     /*****************************************/
@@ -317,13 +317,13 @@ __NEXT_STEP:;
             auto it = _lazy_modules.find(name);
             if(it == _lazy_modules.end()){
                 bool ok = false;
-                source = _read_file_cwd(name.str() + ".py", &ok);
-                if(!ok) _error("ImportError", "module " + name.str().escape(true) + " not found");
+                source = _read_file_cwd(fmt(name, ".py"), &ok);
+                if(!ok) _error("ImportError", fmt("module ", name.escape(), " not found"));
             }else{
                 source = it->second;
                 _lazy_modules.erase(it);
             }
-            CodeObject_ code = compile(source, name.str(), EXEC_MODE);
+            CodeObject_ code = compile(source, name.sv(), EXEC_MODE);
             PyObject* new_mod = new_module(name);
             _exec(code, new_mod);
             new_mod->attr()._try_perfect_rehash();
@@ -335,7 +335,7 @@ __NEXT_STEP:;
     case OP_IMPORT_STAR: {
         PyObject* obj = frame->popx();
         for(auto& [name, value]: obj->attr().items()){
-            Str s = name.str();
+            std::string_view s = name.sv();
             if(s.empty() || s[0] == '_') continue;
             frame->f_globals().set(name, value);
         }
@@ -416,7 +416,7 @@ __NEXT_STEP:;
         _error(type, msg);
     } DISPATCH();
     case OP_RE_RAISE: _raise(); DISPATCH();
-    default: throw std::runtime_error(OP_NAMES[byte.op] + std::string(" is not implemented"));
+    default: throw std::runtime_error(fmt(OP_NAMES[byte.op], " is not implemented"));
     }
     UNREACHABLE();
 }

+ 10 - 10
src/cffi.h

@@ -131,14 +131,14 @@ struct TypeDB{
         return index == 0 ? nullptr : &_by_index[index-1];
     }
 
-    const TypeInfo* get(const char name[]) const {
+    const TypeInfo* get(std::string_view name) const {
         auto it = _by_name.find(name);
         if(it == _by_name.end()) return nullptr;
         return get(it->second);
     }
 
     const TypeInfo* get(const Str& s) const {
-        return get(s.c_str());
+        return get(s.sv());
     }
 
     template<typename T>
@@ -203,7 +203,7 @@ struct Pointer{
 
         vm->bind_method<0>(type, "__repr__", [](VM* vm, Args& args) {
             Pointer& self = CAST(Pointer&, args[0]);
-            StrStream ss;
+            std::stringstream ss;
             ss << "<" << self.ctype->name;
             for(int i=0; i<self.level; i++) ss << "*";
             ss << " at " << (i64)self.ptr << ">";
@@ -319,7 +319,7 @@ struct Pointer{
     Pointer _to(VM* vm, StrName name){
         auto it = ctype->members.find(name);
         if(it == ctype->members.end()){
-            vm->AttributeError(Str("struct '") + ctype->name + "' has no member " + name.str().escape(true));
+            vm->AttributeError(fmt("struct '", ctype->name, "' has no member ", name.escape()));
         }
         const MemberInfo& info = it->second;
         return {info.type, level, ptr+info.offset};
@@ -390,7 +390,7 @@ struct CType{
         vm->bind_static_method<1>(type, "__new__", [](VM* vm, Args& args) {
             const Str& name = CAST(Str&, args[0]);
             const TypeInfo* type = _type_db.get(name);
-            if(type == nullptr) vm->TypeError("unknown type: " + name.escape(true));
+            if(type == nullptr) vm->TypeError("unknown type: " + name.escape());
             return VAR_T(CType, type);
         });
 
@@ -432,22 +432,22 @@ inline void add_module_c(VM* vm){
         Pointer& self = CAST(Pointer&, args[0]);
         const Str& name = CAST(Str&, args[1]);
         int level = 0;
-        for(int i=name.size()-1; i>=0; i--){
+        for(int i=name.length()-1; i>=0; i--){
             if(name[i] == '*') level++;
             else break;
         }
         if(level == 0) vm->TypeError("expect a pointer type, such as 'int*'");
-        Str type_s = name.substr(0, name.size()-level);
+        Str type_s = name.substr(0, name.length()-level);
         const TypeInfo* type = _type_db.get(type_s);
-        if(type == nullptr) vm->TypeError("unknown type: " + type_s.escape(true));
+        if(type == nullptr) vm->TypeError("unknown type: " + type_s.escape());
         return VAR_T(Pointer, type, level, self.ptr);
     });
 
     vm->bind_func<1>(mod, "sizeof", [](VM* vm, Args& args) {
         const Str& name = CAST(Str&, args[0]);
-        if(name.find('*') != Str::npos) return VAR(sizeof(void*));
+        if(name.index("*") != -1) return VAR(sizeof(void*));
         const TypeInfo* type = _type_db.get(name);
-        if(type == nullptr) vm->TypeError("unknown type: " + name.escape(true));
+        if(type == nullptr) vm->TypeError("unknown type: " + name.escape());
         return VAR(type->size);
     });
 

+ 7 - 7
src/compiler.h

@@ -112,9 +112,9 @@ class Compiler {
 
     void consume(TokenIndex expected) {
         if (!match(expected)){
-            StrStream ss;
-            ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(curr().type) << "'";
-            SyntaxError(ss.str());
+            SyntaxError(
+                fmt("expected '", TK_STR(expected), "', but got '", TK_STR(curr().type), "'")
+            );
         }
     }
 
@@ -190,7 +190,7 @@ class Compiler {
             _compile_f_args(e->decl, false);
             consume(TK(":"));
         }
-        e->decl->code = push_context(lexer->src, e->decl->name.str());
+        e->decl->code = push_context(lexer->src, e->decl->name.sv());
         EXPR(false); // https://github.com/blueloveTH/pocketpy/issues/37
         ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
         pop_context();
@@ -775,7 +775,7 @@ __SUBSCR_END:
                 if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE");
                 consume(TK(".")); consume(TK("@id"));
                 bool ok = ctx()->add_label(prev().str());
-                if(!ok) SyntaxError("label " + prev().str().escape(true) + " already exists");
+                if(!ok) SyntaxError("label " + prev().str().escape() + " already exists");
                 consume_end_stmt();
             } break;
             case TK("goto"):
@@ -877,7 +877,7 @@ __SUBSCR_END:
         if(match(TK("->"))){
             if(!match(TK("None"))) consume(TK("@id"));
         }
-        decl->code = push_context(lexer->src, decl->name.str());
+        decl->code = push_context(lexer->src, decl->name.sv());
         compile_block_body();
         pop_context();
         ctx()->emit(OP_LOAD_FUNCTION, ctx()->add_func_decl(decl), prev().line);
@@ -928,7 +928,7 @@ __SUBSCR_END:
     void IndentationError(Str msg){ lexer->throw_err("IndentationError", msg, curr().line, curr().start); }
 
 public:
-    Compiler(VM* vm, const char* source, Str filename, CompileMode mode){
+    Compiler(VM* vm, const Str& source, const Str& filename, CompileMode mode){
         this->vm = vm;
         this->used = false;
         this->lexer = std::make_unique<Lexer>(

+ 12 - 11
src/error.h

@@ -38,14 +38,15 @@ struct SourceData {
         return {_start, i};
     }
 
-    SourceData(const char* source, Str filename, CompileMode mode) {
+    SourceData(const Str& source, const Str& filename, CompileMode mode) {
+        int index = 0;
         // Skip utf8 BOM if there is any.
-        if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3;
+        if (strncmp(source.begin(), "\xEF\xBB\xBF", 3) == 0) index += 3;
         // Remove all '\r'
-        StrStream ss;
-        while(*source != '\0'){
-            if(*source != '\r') ss << *source;
-            source++;
+        std::stringstream ss;
+        while(index < source.length()){
+            if(source[index] != '\r') ss << source[index];
+            index++;
         }
 
         this->filename = filename;
@@ -55,14 +56,14 @@ struct SourceData {
     }
 
     Str snapshot(int lineno, const char* cursor=nullptr){
-        StrStream ss;
+        std::stringstream ss;
         ss << "  " << "File \"" << filename << "\", line " << lineno << '\n';
         std::pair<const char*,const char*> pair = get_line(lineno);
         Str line = "<?>";
         int removed_spaces = 0;
         if(pair.first && pair.second){
             line = Str(pair.first, pair.second-pair.first).lstrip();
-            removed_spaces = pair.second - pair.first - line.size();
+            removed_spaces = pair.second - pair.first - line.length();
             if(line.empty()) line = "<?>";
         }
         ss << "    " << line;
@@ -91,11 +92,11 @@ public:
 
     Str summary() const {
         StackTrace st(stacktrace);
-        StrStream ss;
+        std::stringstream ss;
         if(is_re) ss << "Traceback (most recent call last):\n";
         while(!st.empty()) { ss << st.top() << '\n'; st.pop(); }
-        if (!msg.empty()) ss << type.str() << ": " << msg;
-        else ss << type.str();
+        if (!msg.empty()) ss << type.sv() << ": " << msg;
+        else ss << type.sv();
         return ss.str();
     }
 };

+ 37 - 39
src/expr.h

@@ -15,7 +15,7 @@ struct Expr{
     int line = 0;
     virtual ~Expr() = default;
     virtual void emit(CodeEmitContext* ctx) = 0;
-    virtual Str str() const = 0;
+    virtual std::string str() const = 0;
 
     virtual bool is_starred() const { return false; }
     virtual bool is_literal() const { return false; }
@@ -23,7 +23,7 @@ struct Expr{
     virtual bool is_attrib() const { return false; }
 
     // for OP_DELETE_XXX
-    virtual bool emit_del(CodeEmitContext* ctx) { return false; }
+    [[nodiscard]] virtual bool emit_del(CodeEmitContext* ctx) { return false; }
 
     // for OP_STORE_XXX
     [[nodiscard]] virtual bool emit_store(CodeEmitContext* ctx) { return false; }
@@ -64,8 +64,8 @@ struct CodeEmitContext{
         expr->emit(this);
     }
 
-    Str _log_s_expr(){
-        StrStream ss;
+    std::string _log_s_expr(){
+        std::stringstream ss;
         for(auto& e: s_expr.data()) ss << e->str() << " ";
         return ss.str();
     }
@@ -118,7 +118,7 @@ struct NameExpr: Expr{
     NameScope scope;
     NameExpr(StrName name, NameScope scope): name(name), scope(scope) {}
 
-    Str str() const override { return "$" + name.str(); }
+    std::string str() const override { return fmt("Name(", name.escape(), ")"); }
 
     void emit(CodeEmitContext* ctx) override {
         int index = ctx->add_name(name);
@@ -161,7 +161,7 @@ struct NameExpr: Expr{
 struct StarredExpr: Expr{
     Expr_ child;
     StarredExpr(Expr_&& child): child(std::move(child)) {}
-    Str str() const override { return "*"; }
+    std::string str() const override { return "Starred()"; }
 
     bool is_starred() const override { return true; }
 
@@ -180,7 +180,7 @@ struct StarredExpr: Expr{
 struct NotExpr: Expr{
     Expr_ child;
     NotExpr(Expr_&& child): child(std::move(child)) {}
-    Str str() const override { return "not"; }
+    std::string str() const override { return "Not()"; }
 
     void emit(CodeEmitContext* ctx) override {
         child->emit(ctx);
@@ -192,7 +192,7 @@ struct NotExpr: Expr{
 struct AndExpr: Expr{
     Expr_ lhs;
     Expr_ rhs;
-    Str str() const override { return "and"; }
+    std::string str() const override { return "And()"; }
 
     void emit(CodeEmitContext* ctx) override {
         lhs->emit(ctx);
@@ -206,7 +206,7 @@ struct AndExpr: Expr{
 struct OrExpr: Expr{
     Expr_ lhs;
     Expr_ rhs;
-    Str str() const override { return "or"; }
+    std::string str() const override { return "Or()"; }
 
     void emit(CodeEmitContext* ctx) override {
         lhs->emit(ctx);
@@ -220,7 +220,7 @@ struct OrExpr: Expr{
 struct Literal0Expr: Expr{
     TokenIndex token;
     Literal0Expr(TokenIndex token): token(token) {}
-    Str str() const override { return TK_STR(token); }
+    std::string str() const override { return TK_STR(token); }
 
     void emit(CodeEmitContext* ctx) override {
         switch (token) {
@@ -239,7 +239,7 @@ struct Literal0Expr: Expr{
 struct LiteralExpr: Expr{
     TokenValue value;
     LiteralExpr(TokenValue value): value(value) {}
-    Str str() const override {
+    std::string str() const override {
         if(std::holds_alternative<i64>(value)){
             return std::to_string(std::get<i64>(value));
         }
@@ -249,7 +249,8 @@ struct LiteralExpr: Expr{
         }
 
         if(std::holds_alternative<Str>(value)){
-            return std::get<Str>(value).escape(true);
+            Str s = std::get<Str>(value).escape();
+            return s.str();
         }
 
         UNREACHABLE();
@@ -285,7 +286,7 @@ struct LiteralExpr: Expr{
 struct NegatedExpr: Expr{
     Expr_ child;
     NegatedExpr(Expr_&& child): child(std::move(child)) {}
-    Str str() const override { return "-"; }
+    std::string str() const override { return "Negated()"; }
 
     void emit(CodeEmitContext* ctx) override {
         VM* vm = ctx->vm;
@@ -318,7 +319,7 @@ struct SliceExpr: Expr{
     Expr_ start;
     Expr_ stop;
     Expr_ step;
-    Str str() const override { return "slice()"; }
+    std::string str() const override { return "Slice()"; }
 
     void emit(CodeEmitContext* ctx) override {
         if(start){
@@ -346,7 +347,7 @@ struct SliceExpr: Expr{
 struct DictItemExpr: Expr{
     Expr_ key;
     Expr_ value;
-    Str str() const override { return "k:v"; }
+    std::string str() const override { return "DictItem()"; }
 
     void emit(CodeEmitContext* ctx) override {
         value->emit(ctx);
@@ -368,7 +369,7 @@ struct SequenceExpr: Expr{
 
 struct ListExpr: SequenceExpr{
     using SequenceExpr::SequenceExpr;
-    Str str() const override { return "list()"; }
+    std::string str() const override { return "List()"; }
     Opcode opcode() const override { return OP_BUILD_LIST; }
 
     bool is_json_object() const override { return true; }
@@ -376,7 +377,7 @@ struct ListExpr: SequenceExpr{
 
 struct DictExpr: SequenceExpr{
     using SequenceExpr::SequenceExpr;
-    Str str() const override { return "dict()"; }
+    std::string str() const override { return "Dict()"; }
     Opcode opcode() const override { return OP_BUILD_DICT; }
 
     bool is_json_object() const override { return true; }
@@ -384,13 +385,13 @@ struct DictExpr: SequenceExpr{
 
 struct SetExpr: SequenceExpr{
     using SequenceExpr::SequenceExpr;
-    Str str() const override { return "set()"; }
+    std::string str() const override { return "Set()"; }
     Opcode opcode() const override { return OP_BUILD_SET; }
 };
 
 struct TupleExpr: SequenceExpr{
     using SequenceExpr::SequenceExpr;
-    Str str() const override { return "tuple()"; }
+    std::string str() const override { return "Tuple()"; }
     Opcode opcode() const override { return OP_BUILD_TUPLE; }
 
     bool emit_store(CodeEmitContext* ctx) override {
@@ -467,25 +468,25 @@ struct CompExpr: Expr{
 struct ListCompExpr: CompExpr{
     Opcode op0() override { return OP_BUILD_LIST; }
     Opcode op1() override { return OP_LIST_APPEND; }
-    Str str() const override { return "listcomp()"; }
+    std::string str() const override { return "ListComp()"; }
 };
 
 struct DictCompExpr: CompExpr{
     Opcode op0() override { return OP_BUILD_DICT; }
     Opcode op1() override { return OP_DICT_ADD; }
-    Str str() const override { return "dictcomp()"; }
+    std::string str() const override { return "DictComp()"; }
 };
 
 struct SetCompExpr: CompExpr{
     Opcode op0() override { return OP_BUILD_SET; }
     Opcode op1() override { return OP_SET_ADD; }
-    Str str() const override { return "setcomp()"; }
+    std::string str() const override { return "SetComp()"; }
 };
 
 struct LambdaExpr: Expr{
     FuncDecl_ decl;
     NameScope scope;
-    Str str() const override { return "<lambda>"; }
+    std::string str() const override { return "Lambda()"; }
 
     LambdaExpr(NameScope scope){
         this->decl = make_sp<FuncDecl>();
@@ -502,21 +503,21 @@ struct LambdaExpr: Expr{
 struct FStringExpr: Expr{
     Str src;
     FStringExpr(const Str& src): src(src) {}
-    Str str() const override {
-        return "f" + src.escape(true);
+    std::string str() const override {
+        return fmt("f", src.escape());
     }
 
     void emit(CodeEmitContext* ctx) override {
         VM* vm = ctx->vm;
         static const std::regex pattern(R"(\{(.*?)\})");
-        std::sregex_iterator begin(src.begin(), src.end(), pattern);
-        std::sregex_iterator end;
+        std::cregex_iterator begin(src.begin(), src.end(), pattern);
+        std::cregex_iterator end;
         int size = 0;
         int i = 0;
         for(auto it = begin; it != end; it++) {
-            std::smatch m = *it;
+            std::cmatch m = *it;
             if (i < m.position()) {
-                std::string literal = src.substr(i, m.position() - i);
+                Str literal = src.substr(i, m.position() - i);
                 ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(literal)), line);
                 size++;
             }
@@ -527,8 +528,8 @@ struct FStringExpr: Expr{
             size++;
             i = (int)(m.position() + m.length());
         }
-        if (i < src.size()) {
-            std::string literal = src.substr(i, src.size() - i);
+        if (i < src.length()) {
+            Str literal = src.substr(i, src.length() - i);
             ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(literal)), line);
             size++;
         }
@@ -539,7 +540,7 @@ struct FStringExpr: Expr{
 struct SubscrExpr: Expr{
     Expr_ a;
     Expr_ b;
-    Str str() const override { return "a[b]"; }
+    std::string str() const override { return "Subscr()"; }
 
     void emit(CodeEmitContext* ctx) override{
         a->emit(ctx);
@@ -567,7 +568,7 @@ struct AttribExpr: Expr{
     Str b;
     AttribExpr(Expr_ a, const Str& b): a(std::move(a)), b(b) {}
     AttribExpr(Expr_ a, Str&& b): a(std::move(a)), b(std::move(b)) {}
-    Str str() const override { return "a.b"; }
+    std::string str() const override { return "Attrib()"; }
 
     void emit(CodeEmitContext* ctx) override{
         a->emit(ctx);
@@ -603,7 +604,7 @@ struct CallExpr: Expr{
     Expr_ callable;
     std::vector<Expr_> args;
     std::vector<std::pair<Str, Expr_>> kwargs;
-    Str str() const override { return "call(...)"; }
+    std::string str() const override { return "Call()"; }
 
     bool need_unpack() const {
         for(auto& item: args) if(item->is_starred()) return true;
@@ -643,7 +644,7 @@ struct BinaryExpr: Expr{
     TokenIndex op;
     Expr_ lhs;
     Expr_ rhs;
-    Str str() const override { return TK_STR(op); }
+    std::string str() const override { return TK_STR(op); }
 
     void emit(CodeEmitContext* ctx) override {
         lhs->emit(ctx);
@@ -683,10 +684,7 @@ struct TernaryExpr: Expr{
     Expr_ cond;
     Expr_ true_expr;
     Expr_ false_expr;
-
-    Str str() const override {
-        return "cond ? t : f";
-    }
+    std::string str() const override { return "Ternary()"; }
 
     void emit(CodeEmitContext* ctx) override {
         cond->emit(ctx);

+ 2 - 2
src/frame.h

@@ -43,8 +43,8 @@ struct Frame {
         return co->src->snapshot(line);
     }
 
-    Str stack_info(){
-        StrStream ss;
+    std::string stack_info(){
+        std::stringstream ss;
         ss << id << " [";
         for(int i=0; i<_data.size(); i++){
             ss << (i64)_data[i];

+ 4 - 4
src/gc.h

@@ -37,7 +37,7 @@ struct ManagedHeap{
     template<typename T>
     PyObject* gcnew(Type type, T&& val){
         using __T = Py_<std::decay_t<T>>;
-        PyObject* obj = new(pool128.alloc<__T>()) __T(type, std::forward<T>(val));
+        PyObject* obj = new(pool64.alloc<__T>()) __T(type, std::forward<T>(val));
         gen.push_back(obj);
         gc_counter++;
         return obj;
@@ -46,7 +46,7 @@ struct ManagedHeap{
     template<typename T>
     PyObject* _new(Type type, T&& val){
         using __T = Py_<std::decay_t<T>>;
-        PyObject* obj = new(pool128.alloc<__T>()) __T(type, std::forward<T>(val));
+        PyObject* obj = new(pool64.alloc<__T>()) __T(type, std::forward<T>(val));
         obj->gc.enabled = false;
         _no_gc.push_back(obj);
         return obj;
@@ -57,7 +57,7 @@ struct ManagedHeap{
 #endif
 
     ~ManagedHeap(){
-        for(PyObject* obj: _no_gc) obj->~PyObject(), pool128.dealloc(obj);
+        for(PyObject* obj: _no_gc) obj->~PyObject(), pool64.dealloc(obj);
 #if DEBUG_GC_STATS
         for(auto& [type, count]: deleted){
             std::cout << "GC: " << obj_type_name(vm, type) << "=" << count << std::endl;
@@ -75,7 +75,7 @@ struct ManagedHeap{
 #if DEBUG_GC_STATS
                 deleted[obj->type] += 1;
 #endif
-                obj->~PyObject(), pool128.dealloc(obj);
+                obj->~PyObject(), pool64.dealloc(obj);
             }
         }
 

+ 7 - 6
src/lexer.h

@@ -57,12 +57,13 @@ struct Token{
   TokenValue value;
 
   Str str() const { return Str(start, length);}
+  std::string_view sv() const { return std::string_view(start, length);}
 
-  Str info() const {
-    StrStream ss;
-    Str raw = str();
-    if (raw == Str("\n")) raw = "\\n";
-    ss << line << ": " << TK_STR(type) << " '" << raw << "'";
+  std::string info() const {
+    std::stringstream ss;
+    ss << line << ": " << TK_STR(type) << " '" << (
+        sv()=="\n" ? "\\n" : sv()
+    ) << "'";
     return ss.str();
   }
 };
@@ -171,7 +172,7 @@ struct Lexer {
         curr_char--;
         while(true){
             unsigned char c = peekchar();
-            int u8bytes = utf8len(c);
+            int u8bytes = utf8len(c, true);
             if(u8bytes == 0) return 1;
             if(u8bytes == 1){
                 if(isalpha(c) || c=='_' || isdigit(c)) {

+ 2 - 2
src/namedict.h

@@ -87,7 +87,7 @@ while(!_items[i].first.empty()) {       \
     PyObject* operator[](StrName key) const {
         bool ok; uint16_t i;
         HASH_PROBE(key, ok, i);
-        if(!ok) throw std::out_of_range("NameDict key not found: " + key.str());
+        if(!ok) throw std::out_of_range(fmt("NameDict key not found: ", key));
         return _items[i].second;
     }
 
@@ -159,7 +159,7 @@ while(!_items[i].first.empty()) {       \
     void erase(StrName key){
         bool ok; uint16_t i;
         HASH_PROBE(key, ok, i);
-        if(!ok) throw std::out_of_range("NameDict key not found: " + key.str());
+        if(!ok) throw std::out_of_range(fmt("NameDict key not found: ", key));
         _items[i].first = StrName();
         _items[i].second = nullptr;
         _size--;

+ 35 - 38
src/pocketpy.h

@@ -12,7 +12,7 @@
 namespace pkpy {
 
 inline CodeObject_ VM::compile(Str source, Str filename, CompileMode mode) {
-    Compiler compiler(this, source.c_str(), filename, mode);
+    Compiler compiler(this, source, filename, mode);
     try{
         return compiler.compile();
     }catch(Exception& e){
@@ -71,7 +71,7 @@ inline void init_builtins(VM* _vm) {
         if(!vm->isinstance(args[1], type)){
             Str _0 = obj_type_name(vm, OBJ_GET(Type, vm->_t(args[1])));
             Str _1 = obj_type_name(vm, type);
-            vm->TypeError("super(): " + _0.escape(true) + " is not an instance of " + _1.escape(true));
+            vm->TypeError("super(): " + _0.escape() + " is not an instance of " + _1.escape());
         }
         Type base = vm->_all_types[type].base;
         return vm->heap.gcnew(vm->tp_super, Super(args[1], base));
@@ -150,7 +150,7 @@ inline void init_builtins(VM* _vm) {
     });
 
     _vm->bind_builtin_func<1>("hex", [](VM* vm, Args& args) {
-        StrStream ss;
+        std::stringstream ss;
         ss << std::hex << CAST(i64, args[0]);
         return VAR("0x" + ss.str());
     });
@@ -169,14 +169,14 @@ inline void init_builtins(VM* _vm) {
         std::vector<StrName> keys = t_attr.keys();
         names.insert(keys.begin(), keys.end());
         List ret;
-        for (StrName name : names) ret.push_back(VAR(name.str()));
+        for (StrName name : names) ret.push_back(VAR(name.sv()));
         return VAR(std::move(ret));
     });
 
     _vm->bind_method<0>("object", "__repr__", [](VM* vm, Args& args) {
         PyObject* self = args[0];
         if(is_tagged(self)) self = nullptr;
-        StrStream ss;
+        std::stringstream ss;
         ss << "<" << OBJ_NAME(vm->_t(self)) << " object at " << std::hex << self << ">";
         return VAR(ss.str());
     });
@@ -241,7 +241,7 @@ inline void init_builtins(VM* _vm) {
                 if(parsed != s.length()) throw std::invalid_argument("<?>");
                 return VAR(val);
             }catch(std::invalid_argument&){
-                vm->ValueError("invalid literal for int(): " + s.escape(true));
+                vm->ValueError("invalid literal for int(): " + s.escape());
             }
         }
         vm->TypeError("int() argument must be a int, float, bool or str");
@@ -297,7 +297,7 @@ inline void init_builtins(VM* _vm) {
     _vm->bind_method<0>("float", "__repr__", [](VM* vm, Args& args) {
         f64 val = CAST(f64, args[0]);
         if(std::isinf(val) || std::isnan(val)) return VAR(std::to_string(val));
-        StrStream ss;
+        std::stringstream ss;
         ss << std::setprecision(std::numeric_limits<f64>::max_digits10-1-2) << val;
         std::string s = ss.str();
         if(std::all_of(s.begin()+1, s.end(), isdigit)) s += ".0";
@@ -335,7 +335,7 @@ inline void init_builtins(VM* _vm) {
 
     _vm->bind_method<0>("str", "__repr__", [](VM* vm, Args& args) {
         const Str& _self = CAST(Str&, args[0]);
-        return VAR(_self.escape(true));
+        return VAR(_self.escape());
     });
 
     _vm->bind_method<0>("str", "__json__", [](VM* vm, Args& args) {
@@ -405,7 +405,7 @@ inline void init_builtins(VM* _vm) {
 
     _vm->bind_method<1>("str", "join", [](VM* vm, Args& args) {
         const Str& self = CAST(Str&, args[0]);
-        StrStream ss;
+        FastStrStream ss;
         PyObject* obj = vm->asList(args[1]);
         const List& list = CAST(List&, obj);
         for (int i = 0; i < list.size(); ++i) {
@@ -639,8 +639,8 @@ struct ReMatch {
 
     i64 start;
     i64 end;
-    std::smatch m;
-    ReMatch(i64 start, i64 end, std::smatch m) : start(start), end(end), m(m) {}
+    std::cmatch m;
+    ReMatch(i64 start, i64 end, std::cmatch m) : start(start), end(end), m(m) {}
 
     static void _register(VM* vm, PyObject* mod, PyObject* type){
         vm->bind_method<-1>(type, "__init__", CPP_NOT_IMPLEMENTED());
@@ -661,15 +661,13 @@ struct ReMatch {
     }
 };
 
-inline PyObject* _regex_search(const Str& _pattern, const Str& _string, bool fromStart, VM* vm){
-    std::string pattern = _pattern.str();
-    std::string string = _string.str();
-    std::regex re(pattern);
-    std::smatch m;
-    if(std::regex_search(string, m, re)){
-        if(fromStart && m.position() != 0) return vm->None;
-        i64 start = _string._u8_index(m.position());
-        i64 end = _string._u8_index(m.position() + m.length());
+inline PyObject* _regex_search(const Str& pattern, const Str& string, bool from_start, VM* vm){
+    std::regex re(pattern.begin(), pattern.end());
+    std::cmatch m;
+    if(std::regex_search(string.begin(), string.end(), m, re)){
+        if(from_start && m.position() != 0) return vm->None;
+        i64 start = string._byte_index_to_unicode(m.position());
+        i64 end = string._byte_index_to_unicode(m.position() + m.length());
         return VAR_T(ReMatch, start, end, m);
     }
     return vm->None;
@@ -694,18 +692,17 @@ inline void add_module_re(VM* vm){
     vm->bind_func<3>(mod, "sub", [](VM* vm, Args& args) {
         const Str& pattern = CAST(Str&, args[0]);
         const Str& repl = CAST(Str&, args[1]);
-        const Str& _string = CAST(Str&, args[2]);
-        std::regex re(pattern.str());
-        std::string string = _string.str();
-        return VAR(std::regex_replace(string, re, repl));
+        const Str& string = CAST(Str&, args[2]);
+        std::regex re(pattern.begin(), pattern.end());
+        return VAR(std::regex_replace(string.str(), re, repl.str()));
     });
 
     vm->bind_func<2>(mod, "split", [](VM* vm, Args& args) {
-        std::string pattern = CAST(Str&, args[0]).str();
-        std::string string = CAST(Str&, args[1]).str();
-        std::regex re(pattern);
-        std::sregex_token_iterator it(string.begin(), string.end(), re, -1);
-        std::sregex_token_iterator end;
+        const Str& pattern = CAST(Str&, args[0]);
+        const Str& string = CAST(Str&, args[1]);
+        std::regex re(pattern.begin(), pattern.end());
+        std::cregex_token_iterator it(string.begin(), string.end(), re, -1);
+        std::cregex_token_iterator end;
         List vec;
         for(; it != end; ++it){
             vec.push_back(VAR(it->str()));
@@ -863,8 +860,8 @@ extern "C" {
         pkpy::PyObject* val = vm->_main->attr().try_get(name);
         if(val == nullptr) return nullptr;
         try{
-            pkpy::Str repr = pkpy::CAST(pkpy::Str, vm->asRepr(val));
-            return strdup(repr.c_str());
+            pkpy::Str repr = pkpy::CAST(pkpy::Str&, vm->asRepr(val));
+            return repr.c_str_dup();
         }catch(...){
             return nullptr;
         }
@@ -879,8 +876,8 @@ extern "C" {
         pkpy::PyObject* ret = vm->exec(source, "<eval>", pkpy::EVAL_MODE);
         if(ret == nullptr) return nullptr;
         try{
-            pkpy::Str repr = pkpy::CAST(pkpy::Str, vm->asRepr(ret));
-            return strdup(repr.c_str());
+            pkpy::Str repr = pkpy::CAST(pkpy::Str&, vm->asRepr(ret));
+            return repr.c_str_dup();
         }catch(...){
             return nullptr;
         }
@@ -917,12 +914,12 @@ extern "C" {
     ///
     /// Return a json representing the result.
     char* pkpy_vm_read_output(pkpy::VM* vm){
-        if(vm->use_stdio) return nullptr;
-        pkpy::StrStream* s_out = (pkpy::StrStream*)(vm->_stdout);
-        pkpy::StrStream* s_err = (pkpy::StrStream*)(vm->_stderr);
+        if(vm->is_stdio_used()) return nullptr;
+        std::stringstream* s_out = (std::stringstream*)(vm->_stdout);
+        std::stringstream* s_err = (std::stringstream*)(vm->_stderr);
         pkpy::Str _stdout = s_out->str();
         pkpy::Str _stderr = s_err->str();
-        pkpy::StrStream ss;
+        std::stringstream ss;
         ss << '{' << "\"stdout\": " << _stdout.escape(false);
         ss << ", " << "\"stderr\": " << _stderr.escape(false) << '}';
         s_out->str(""); s_err->str("");
@@ -961,7 +958,7 @@ extern "C" {
         std::string f_header = std::string(mod) + '.' + name + '#' + std::to_string(kGlobalBindId++);
         pkpy::PyObject* obj = vm->_modules.contains(mod) ? vm->_modules[mod] : vm->new_module(mod);
         vm->bind_func<-1>(obj, name, [ret_code, f_header](pkpy::VM* vm, const pkpy::Args& args){
-            pkpy::StrStream ss;
+            std::stringstream ss;
             ss << f_header;
             for(int i=0; i<args.size(); i++){
                 ss << ' ';

Diferenças do arquivo suprimidas por serem muito extensas
+ 58 - 41
src/str.h


+ 38 - 52
src/vm.h

@@ -58,8 +58,8 @@ public:
 
     PyObject* run_frame(Frame* frame);
 
-    NameDict _modules;                          // loaded modules
-    std::map<StrName, Str> _lazy_modules;       // lazy loaded modules
+    NameDict _modules;                                  // loaded modules
+    std::map<StrName, Str> _lazy_modules;               // lazy loaded modules
 
     PyObject* _py_op_call;
     PyObject* _py_op_yield;
@@ -71,7 +71,8 @@ public:
     PyObject* builtins;         // builtins module
     PyObject* _main;            // __main__ module
 
-    bool use_stdio;
+    std::stringstream _stdout_buffer;
+    std::stringstream _stderr_buffer;
     std::ostream* _stdout;
     std::ostream* _stderr;
     int recursionlimit = 1000;
@@ -85,18 +86,13 @@ public:
 
     VM(bool use_stdio) : heap(this){
         this->vm = this;
-        this->use_stdio = use_stdio;
-        if(use_stdio){
-            this->_stdout = &std::cout;
-            this->_stderr = &std::cerr;
-        }else{
-            this->_stdout = new StrStream();
-            this->_stderr = new StrStream();
-        }
-
+        this->_stdout = use_stdio ? &std::cout : &_stdout_buffer;
+        this->_stderr = use_stdio ? &std::cerr : &_stderr_buffer;
         init_builtin_types();
     }
 
+    bool is_stdio_used() const { return _stdout == &std::cout; }
+
     Frame* top_frame() const {
 #if DEBUG_EXTRA_CHECK
         if(callstack.empty()) UNREACHABLE();
@@ -116,7 +112,7 @@ public:
         PyObject* self;
         PyObject* iter_f = get_unbound_method(obj, __iter__, &self, false);
         if(self != _py_null) return call(iter_f, Args{self});
-        TypeError(OBJ_NAME(_t(obj)).escape(true) + " object is not iterable");
+        TypeError(OBJ_NAME(_t(obj)).escape() + " object is not iterable");
         return nullptr;
     }
 
@@ -210,7 +206,7 @@ public:
         PyTypeInfo info{
             .obj = obj,
             .base = base,
-            .name = (mod!=nullptr && mod!=builtins) ? Str(OBJ_NAME(mod)+"."+name.str()): name.str()
+            .name = (mod!=nullptr && mod!=builtins) ? Str(OBJ_NAME(mod)+"."+name.sv()): name.sv()
         };
         if(mod != nullptr) mod->attr().set(name, obj);
         _all_types.push_back(info);
@@ -226,7 +222,7 @@ public:
         PyObject* obj = builtins->attr().try_get(type);
         if(obj == nullptr){
             for(auto& t: _all_types) if(t.name == type) return t.obj;
-            throw std::runtime_error("type not found: " + type);
+            throw std::runtime_error(fmt("type not found: ", type));
         }
         return obj;
     }
@@ -293,18 +289,18 @@ public:
     void ZeroDivisionError(){ _error("ZeroDivisionError", "division by zero"); }
     void IndexError(const Str& msg){ _error("IndexError", msg); }
     void ValueError(const Str& msg){ _error("ValueError", msg); }
-    void NameError(StrName name){ _error("NameError", "name " + name.str().escape(true) + " is not defined"); }
+    void NameError(StrName name){ _error("NameError", fmt("name ", name.escape() + " is not defined")); }
 
     void AttributeError(PyObject* obj, StrName name){
         // OBJ_NAME calls getattr, which may lead to a infinite recursion
-        _error("AttributeError", "type " +  OBJ_NAME(_t(obj)).escape(true) + " has no attribute " + name.str().escape(true));
+        _error("AttributeError", fmt("type ", OBJ_NAME(_t(obj)).escape(), " has no attribute ", name.escape()));
     }
 
     void AttributeError(Str msg){ _error("AttributeError", msg); }
 
     void check_type(PyObject* obj, Type type){
         if(is_type(obj, type)) return;
-        TypeError("expected " + OBJ_NAME(_t(type)).escape(true) + ", but got " + OBJ_NAME(_t(obj)).escape(true));
+        TypeError("expected " + OBJ_NAME(_t(type)).escape() + ", but got " + OBJ_NAME(_t(obj)).escape());
     }
 
     PyObject* _t(Type t){
@@ -317,13 +313,7 @@ public:
         return _all_types[OBJ_GET(Type, _t(obj->type)).index].obj;
     }
 
-    ~VM() {
-        heap.collect();
-        if(!use_stdio){
-            delete _stdout;
-            delete _stderr;
-        }
-    }
+    ~VM() { heap.collect(); }
 
     CodeObject_ compile(Str source, Str filename, CompileMode mode);
     PyObject* num_negated(PyObject* obj);
@@ -363,14 +353,6 @@ inline void CodeObject::optimize(VM* vm){
     uint32_t base_n = (uint32_t)(names.size() / kLocalsLoadFactor + 0.5);
     perfect_locals_capacity = find_next_capacity(base_n);
     perfect_hash_seed = find_perfect_hash_seed(perfect_locals_capacity, names);
-
-    // pre-compute sn in co_consts
-    for(int i=0; i<consts.size(); i++){
-        if(is_type(consts[i], vm->tp_str)){
-            Str& s = OBJ_GET(Str, consts[i]);
-            s._cached_sn_index = StrName::get(s.c_str()).index;
-        }
-    }
 }
 
 DEF_NATIVE_2(Str, tp_str)
@@ -482,6 +464,10 @@ inline PyObject* py_var(VM* vm, std::string val){
     return VAR(Str(std::move(val)));
 }
 
+inline PyObject* py_var(VM* vm, std::string_view val){
+    return VAR(Str(val));
+}
+
 template<typename T>
 void _check_py_class(VM* vm, PyObject* obj){
     vm->check_type(obj, T::_type(vm));
@@ -493,7 +479,7 @@ inline PyObject* VM::num_negated(PyObject* obj){
     }else if(is_float(obj)){
         return VAR(-CAST(f64, obj));
     }
-    TypeError("expected 'int' or 'float', got " + OBJ_NAME(_t(obj)).escape(true));
+    TypeError("expected 'int' or 'float', got " + OBJ_NAME(_t(obj)).escape());
     return nullptr;
 }
 
@@ -503,7 +489,7 @@ inline f64 VM::num_to_float(PyObject* obj){
     } else if (is_int(obj)){
         return (f64)CAST(i64, obj);
     }
-    TypeError("expected 'int' or 'float', got " + OBJ_NAME(_t(obj)).escape(true));
+    TypeError("expected 'int' or 'float', got " + OBJ_NAME(_t(obj)).escape());
     return 0;
 }
 
@@ -540,7 +526,7 @@ inline i64 VM::hash(PyObject* obj){
         f64 val = CAST(f64, obj);
         return (i64)std::hash<f64>()(val);
     }
-    TypeError("unhashable type: " +  OBJ_NAME(_t(obj)).escape(true));
+    TypeError("unhashable type: " +  OBJ_NAME(_t(obj)).escape());
     return 0;
 }
 
@@ -551,7 +537,7 @@ inline PyObject* VM::asRepr(PyObject* obj){
 
 inline PyObject* VM::new_module(StrName name) {
     PyObject* obj = heap._new<DummyModule>(tp_module, DummyModule());
-    obj->attr().set(__name__, VAR(name.str()));
+    obj->attr().set(__name__, VAR(name.sv()));
     // we do not allow override in order to avoid memory leak
     // it is because Module objects are not garbage collected
     if(_modules.contains(name)) UNREACHABLE();
@@ -571,7 +557,7 @@ inline Str VM::disassemble(CodeObject_ co){
             jumpTargets.push_back(byte.arg);
         }
     }
-    StrStream ss;
+    std::stringstream ss;
     int prev_line = -1;
     for(int i=0; i<co->codes.size(); i++){
         const Bytecode& byte = co->codes[i];
@@ -594,23 +580,23 @@ inline Str VM::disassemble(CodeObject_ co){
         std::string argStr = byte.arg == -1 ? "" : std::to_string(byte.arg);
         switch(byte.op){
             case OP_LOAD_CONST:
-                argStr += " (" + CAST(Str, asRepr(co->consts[byte.arg])) + ")";
+                argStr += fmt(" (", CAST(Str, asRepr(co->consts[byte.arg])), ")");
                 break;
             case OP_LOAD_NAME: case OP_LOAD_GLOBAL:
             case OP_STORE_LOCAL: case OP_STORE_GLOBAL:
             case OP_LOAD_ATTR: case OP_LOAD_METHOD: case OP_STORE_ATTR: case OP_DELETE_ATTR:
             case OP_IMPORT_NAME: case OP_BEGIN_CLASS:
             case OP_DELETE_LOCAL: case OP_DELETE_GLOBAL:
-                argStr += " (" + co->names[byte.arg].str() + ")";
+                argStr += fmt(" (", co->names[byte.arg].sv(), ")");
                 break;
             case OP_BINARY_OP:
-                argStr += " (" + BINARY_SPECIAL_METHODS[byte.arg].str() + ")";
+                argStr += fmt(" (", BINARY_SPECIAL_METHODS[byte.arg], ")");
                 break;
             case OP_COMPARE_OP:
-                argStr += " (" + COMPARE_SPECIAL_METHODS[byte.arg].str() + ")";
+                argStr += fmt(" (", COMPARE_SPECIAL_METHODS[byte.arg], ")");
                 break;
             case OP_BITWISE_OP:
-                argStr += " (" + BITWISE_SPECIAL_METHODS[byte.arg].str() + ")";
+                argStr += fmt(" (", BITWISE_SPECIAL_METHODS[byte.arg], ")");
                 break;
         }
         ss << pad(argStr, 40);      // may overflow
@@ -619,21 +605,21 @@ inline Str VM::disassemble(CodeObject_ co){
     }
 
 #if !DEBUG_DIS_EXEC_MIN
-    StrStream consts;
+    std::stringstream consts;
     consts << "co_consts: ";
-    consts << CAST(Str, asRepr(VAR(co->consts)));
+    consts << CAST(Str&, asRepr(VAR(co->consts)));
 
-    StrStream names;
+    std::stringstream names;
     names << "co_names: ";
     List list;
     for(int i=0; i<co->names.size(); i++){
-        list.push_back(VAR(co->names[i].str()));
+        list.push_back(VAR(co->names[i].sv()));
     }
     names << CAST(Str, asRepr(VAR(list)));
     ss << '\n' << consts.str() << '\n' << names.str();
 #endif
     for(auto& decl: co->func_decls){
-        ss << "\n\n" << "Disassembly of " << decl->name.str() << ":\n";
+        ss << "\n\n" << "Disassembly of " << decl->name << ":\n";
         ss << disassemble(decl->code);
     }
     return Str(ss.str());
@@ -733,7 +719,7 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo
                 locals->set(name, args[i++]);
                 continue;
             }
-            TypeError("missing positional argument " + name.str().escape(true));
+            TypeError(fmt("missing positional argument ", name.escape()));
         }
 
         locals->update(fn.decl->kwargs);
@@ -756,7 +742,7 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo
         for(int i=0; i<kwargs.size(); i+=2){
             const Str& key = CAST(Str&, kwargs[i]);
             if(!fn.decl->kwargs.contains(key)){
-                TypeError(key.escape(true) + " is an invalid keyword argument for " + fn.decl->name.str() + "()");
+                TypeError(fmt(key.escape(), " is an invalid keyword argument for ", fn.decl->name, "()"));
             }
             locals->set(key, kwargs[i+1]);
         }
@@ -774,7 +760,7 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo
         args.extend_self(self);
         return call(call_f, std::move(args), kwargs, false);
     }
-    TypeError(OBJ_NAME(_t(callable)).escape(true) + " object is not callable");
+    TypeError(OBJ_NAME(_t(callable)).escape() + " object is not callable");
     return None;
 }
 
@@ -880,7 +866,7 @@ inline void VM::setattr(PyObject* obj, StrName name, T&& value){
             if(descr_set != nullptr){
                 call(descr_set, Args{cls_var, obj, std::forward<T>(value)});
             }else{
-                TypeError("readonly attribute: " + name.str().escape(true));
+                TypeError(fmt("readonly attribute: ", name.escape()));
             }
             return;
         }

Alguns arquivos não foram mostrados porque muitos arquivos mudaram nesse diff