فهرست منبع

Make SourceData and SStream c11 (#266)

* Make SourceData c11

* make it compile

* add a marco

* follow up rename of c11_vetor__push

* remove isascii marco

* fix empty function name handling

* change enum back

* Remove trival accessor

* make pkpy_Str__take_buf hidden

* make it compile

* remove rcptr and use shared_ptr instead

* change enum name in C++ code back

* fix type problem

* remove strdup polyfill

* remove xmake

* ...
方而静 1 سال پیش
والد
کامیت
031f189a4a

+ 4 - 1
.gitignore

@@ -30,4 +30,7 @@ pocketpy.dSYM
 libpocketpy.dylib.dSYM/
 main.dSYM/
 
-docs/references.md
+docs/references.md
+
+.xmake
+.vs

+ 27 - 0
include/pocketpy/common/sstream.h

@@ -0,0 +1,27 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "pocketpy/common/vector.h"
+#include "pocketpy/common/str.h"
+#include <stdint.h>
+
+typedef struct pkpy_SStream {
+    c11_vector data;
+} pkpy_SStream;
+
+void pkpy_SStream__ctor(pkpy_SStream* self);
+void pkpy_SStream__dtor(pkpy_SStream* self);
+void pkpy_SStream__append_cstr(pkpy_SStream* self, const char* str);
+void pkpy_SStream__append_cstrn(pkpy_SStream* self, const char* str, int n);
+void pkpy_SStream__append_Str(pkpy_SStream* self, const pkpy_Str* str);
+void pkpy_SStream__append_char(pkpy_SStream* self, char c);
+void pkpy_SStream__append_int(pkpy_SStream* self, int i);
+void pkpy_SStream__append_int64(pkpy_SStream* self, int64_t i);
+pkpy_Str pkpy_SStream__to_Str(const pkpy_SStream* self);
+
+#ifdef __cplusplus
+}
+#endif

+ 1 - 1
include/pocketpy/compiler/compiler.hpp

@@ -41,7 +41,7 @@ struct Compiler {
 #if PK_DEBUG_COMPILER
         if(__i>=0 && __i<lexer.nexts.size()){
             printf("%s:%d %s %s\n",
-                lexer.src->filename.c_str(),
+                lexer.src.filename().c_str(),
                 curr().line,
                 TK_STR(curr().type),
                 curr().str().escape().c_str()

+ 32 - 0
include/pocketpy/objects/sourcedata.h

@@ -0,0 +1,32 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include "pocketpy/common/str.h"
+#include "pocketpy/common/vector.h"
+
+enum CompileMode { EXEC_MODE, EVAL_MODE, REPL_MODE, JSON_MODE, CELL_MODE };
+
+struct pkpy_SourceData {
+    enum CompileMode mode;
+    bool is_precompiled;
+
+    pkpy_Str filename;
+    pkpy_Str source;
+
+    c11_vector line_starts;     // contains "const char *"
+    c11_vector _precompiled_tokens;  // contains "pkpy_Str"
+};
+
+void pkpy_SourceData__ctor(struct pkpy_SourceData *self, const char *source, int source_size, const pkpy_Str *filename, enum CompileMode mode);
+void pkpy_SourceData__dtor(struct pkpy_SourceData* self);
+
+bool pkpy_SourceData__get_line(const struct pkpy_SourceData *self, int lineno, const char **st, const char **ed);
+pkpy_Str pkpy_SourceData__snapshot(const struct pkpy_SourceData *self, int lineno, const char *cursor, const char *name);
+
+#ifdef __cplusplus
+}
+#endif

+ 17 - 19
include/pocketpy/objects/sourcedata.hpp

@@ -2,28 +2,26 @@
 
 #include "pocketpy/common/utils.h"
 #include "pocketpy/common/str.hpp"
+#include "pocketpy/objects/sourcedata.h"
 
 namespace pkpy {
 
-enum CompileMode { EXEC_MODE, EVAL_MODE, REPL_MODE, JSON_MODE, CELL_MODE };
-
-struct SourceData {
-    PK_ALWAYS_PASS_BY_POINTER(SourceData)
-
-    Str filename;
-    CompileMode mode;
-
-    Str source;
-    vector<const char*> line_starts;
-
-    bool is_precompiled;
-    vector<Str> _precompiled_tokens;
-
-    SourceData(std::string_view source, const Str& filename, CompileMode mode);
-    SourceData(const Str& filename, CompileMode mode);
-    pair<const char*, const char*> _get_line(int lineno) const;
-    std::string_view get_line(int lineno) const;
-    Str snapshot(int lineno, const char* cursor, std::string_view name) const;
+struct SourceData : public pkpy_SourceData {
+    SourceData(std::string_view source, const Str& filename, CompileMode mode) {
+        pkpy_SourceData__ctor(this, source.data(), source.size(), &filename, mode);
+    }
+
+    std::string_view get_line(int lineno) const {
+        const char *st, *ed;
+        if (pkpy_SourceData__get_line(this, lineno, &st, &ed)) {
+            return std::string_view(st, ed - st);
+        }
+        return "<?>";
+    }
+
+    Str snapshot(int lineno, const char* cursor, std::string_view name) const {
+        return pkpy_SourceData__snapshot(this, lineno, cursor, name.empty() ? nullptr : name.data());
+    }
 };
 
 }  // namespace pkpy

+ 95 - 0
src/common/sourcedata.c

@@ -0,0 +1,95 @@
+#include "pocketpy/objects/sourcedata.h"
+#include "pocketpy/common/sstream.h"
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+
+void pkpy_Str__take_buf(pkpy_Str *self, char *data, int size);
+
+void pkpy_SourceData__ctor(struct pkpy_SourceData* self,
+                           const char* source,
+                           int source_size,
+                           const pkpy_Str* filename,
+                           enum CompileMode mode) {
+    self->filename = pkpy_Str__copy(filename);  // OPTIMIZEME?
+    self->mode = mode;
+
+    c11_vector__ctor(&self->line_starts, sizeof(const char*));
+    c11_vector__ctor(&self->_precompiled_tokens, sizeof(pkpy_Str));
+
+    int index = (strncmp(source, "\xEF\xBB\xBF", 3) == 0) ? 3 : 0;
+    int len = source_size - index;
+    for(int i = 0; i < source_size; ++i)
+        len -= (source[i] == '\r');
+
+    char *buf = malloc(len + 1), *p = buf;
+    buf[len] = '\0';
+    for(; index < source_size; ++index) {
+        if(source[index] != '\r') *(p++) = source[index];
+    }
+    pkpy_Str__take_buf(&self->source, buf, len);
+
+    self->is_precompiled = (strncmp(pkpy_Str__data(&self->source), "pkpy:", 5) == 0);
+    c11_vector__push(const char*, &self->line_starts, pkpy_Str__data(&self->source));
+}
+
+void pkpy_SourceData__dtor(struct pkpy_SourceData* self) {
+    pkpy_Str__dtor(&self->filename);
+    pkpy_Str__dtor(&self->source);
+    c11_vector__dtor(&self->line_starts);
+    c11_vector__dtor(&self->_precompiled_tokens);
+}
+
+bool pkpy_SourceData__get_line(const struct pkpy_SourceData* self, int lineno, const char** st, const char** ed) {
+    if(self->is_precompiled || lineno == -1) { return false; }
+    lineno -= 1;
+    if(lineno < 0) lineno = 0;
+    const char* _start = c11__getitem(const char*, &self->line_starts, lineno);
+    const char* i = _start;
+    // max 300 chars
+    while(*i != '\n' && *i != '\0' && i - _start < 300)
+        i++;
+    *st = _start;
+    *ed = i;
+    return true;
+}
+
+pkpy_Str pkpy_SourceData__snapshot(const struct pkpy_SourceData* self, int lineno, const char* cursor, const char* name) {
+    pkpy_SStream ss;
+    pkpy_SStream__ctor(&ss);
+    pkpy_SStream__append_cstr(&ss, "  File \"");
+    pkpy_SStream__append_Str(&ss, &self->filename);
+    pkpy_SStream__append_cstr(&ss, "\", line ");
+    pkpy_SStream__append_int(&ss, lineno);
+
+    if(name) {
+        pkpy_SStream__append_cstr(&ss, ", in ");
+        pkpy_SStream__append_cstr(&ss, name);
+    }
+
+    if(!self->is_precompiled) {
+        pkpy_SStream__append_char(&ss, '\n');
+        const char *st = NULL, *ed;
+        if(pkpy_SourceData__get_line(self, lineno, &st, &ed)) {
+            while(st < ed && isblank(*st))
+                ++st;
+            if(st < ed) {
+                pkpy_SStream__append_cstr(&ss, "    ");
+                pkpy_SStream__append_cstrn(&ss, st, ed - st);
+                if(cursor && st <= cursor && cursor <= ed) {
+                    pkpy_SStream__append_cstr(&ss, "\n    ");
+                    for(int i = 0; i < (cursor - st); ++i)
+                        pkpy_SStream__append_char(&ss, ' ');
+                    pkpy_SStream__append_cstr(&ss, "^");
+                }
+            } else {
+                st = NULL;
+            }
+        }
+
+        if(!st) { pkpy_SStream__append_cstr(&ss, "    <?>"); }
+    }
+    pkpy_Str res = pkpy_SStream__to_Str(&ss);
+    pkpy_SStream__dtor(&ss);
+    return res;
+}

+ 48 - 0
src/common/sstream.c

@@ -0,0 +1,48 @@
+#include "pocketpy/common/sstream.h"
+#include <stdio.h>
+
+void pkpy_SStream__ctor(pkpy_SStream* self) {
+    c11_vector__ctor(&self->data, sizeof(char));
+}
+
+void pkpy_SStream__dtor(pkpy_SStream* self) {
+    c11_vector__dtor(&self->data);
+}
+
+void pkpy_SStream__append_cstr(pkpy_SStream* self, const char* str) {
+    for (int i = 0; str[i] != '\0'; i++) {
+        c11_vector__push(char, &self->data, str[i]);
+    }
+}
+
+void pkpy_SStream__append_cstrn(pkpy_SStream* self, const char* str, int n) {
+    for (int i = 0; i < n; i++) {
+        c11_vector__push(char, &self->data, str[i]);
+    }
+}
+
+void pkpy_SStream__append_Str(pkpy_SStream* self, const pkpy_Str* str) {
+    pkpy_SStream__append_cstr(self, pkpy_Str__data(str));
+}
+
+void pkpy_SStream__append_char(pkpy_SStream* self, char c) {
+    c11_vector__push(char, &self->data, c);
+}
+
+void pkpy_SStream__append_int(pkpy_SStream* self, int i) {
+    char str[12]; // sign + 10 digits + null terminator
+    sprintf(str, "%d", i);
+    pkpy_SStream__append_cstr(self, str);
+}
+
+void pkpy_SStream__append_int64(pkpy_SStream* self, int64_t i) {
+    char str[23]; // sign + 21 digits + null terminator
+    sprintf(str, "%lld", i);
+    pkpy_SStream__append_cstr(self, str);
+}
+
+pkpy_Str pkpy_SStream__to_Str(const pkpy_SStream* self) {
+    pkpy_Str res;
+    pkpy_Str__ctor2(&res, self->data.data, self->data.count);
+    return res;
+}

+ 27 - 7
src/common/str.c

@@ -22,6 +22,32 @@ void pkpy_Str__ctor(pkpy_Str *self, const char *data){
     pkpy_Str__ctor2(self, data, strlen(data));
 }
 
+static void pkpy_Str__check_ascii(pkpy_Str *self, char *p) {
+    for(int i = 0; i < self->size; i++){
+        if(!isascii(p[i])){
+            self->is_ascii = false;
+            break;
+        }
+    }
+}
+
+void pkpy_Str__take_buf(pkpy_Str *self, char *data, int size) {
+    self->size = size;
+    self->is_ascii = true;
+    self->is_sso = size < sizeof(self->_inlined);
+    char* p;
+    if(self->is_sso){
+        p = self->_inlined;
+        memcpy(p, data, size);
+        p[size] = '\0';
+        free(data);
+    }else{
+        self->_ptr = data;
+        p = self->_ptr;
+    }
+    pkpy_Str__check_ascii(self, p);
+}
+
 void pkpy_Str__ctor2(pkpy_Str *self, const char *data, int size){
     self->size = size;
     self->is_ascii = true;
@@ -35,13 +61,7 @@ void pkpy_Str__ctor2(pkpy_Str *self, const char *data, int size){
     }
     memcpy(p, data, size);
     p[size] = '\0';
-    // check is_ascii
-    for(int i = 0; i < size; i++){
-        if(!isascii(p[i])){
-            self->is_ascii = false;
-            break;
-        }
-    }
+    pkpy_Str__check_ascii(self, p);
 }
 
 void pkpy_Str__dtor(pkpy_Str *self){

+ 3 - 3
src/compiler/compiler.cpp

@@ -20,7 +20,7 @@ NameScope Compiler::name_scope() const noexcept{
 }
 
 CodeObject_ Compiler::push_global_context() noexcept{
-    CodeObject_ co = std::make_shared<CodeObject>(lexer.src, lexer.src->filename);
+    CodeObject_ co = std::make_shared<CodeObject>(lexer.src, static_cast<const Str&>(lexer.src->filename));
     co->start_line = __i == 0 ? 1 : prev().line;
     contexts.push_back(CodeEmitContext(vm, co, contexts.size()));
     return co;
@@ -1293,8 +1293,8 @@ Error* Compiler::compile(CodeObject_* out) noexcept{
     Error* err;
     check(lexer.run());
 
-    // if(lexer.src->filename[0] != '<'){
-    //     printf("%s\n", lexer.src->filename.c_str());
+    // if(lexer.src.filename()[0] != '<'){
+    //     printf("%s\n", lexer.src.filename().c_str());
     //     for(int i=0; i<lexer.nexts.size(); i++){
     //         printf("%s: %s\n", TK_STR(tk(i).type), tk(i).str().escape().c_str());
     //     }

+ 8 - 8
src/compiler/lexer.cpp

@@ -99,7 +99,7 @@ char Lexer::eatchar_include_newline() noexcept{
     curr_char++;
     if(c == '\n') {
         current_line++;
-        src->line_starts.push_back(curr_char);
+        c11_vector__push(const char*, &src->line_starts, curr_char);
     }
     return c;
 }
@@ -534,8 +534,8 @@ Error* Lexer::SyntaxError(const char* fmt, ...) noexcept{
 }
 
 Lexer::Lexer(VM* vm, std::shared_ptr<SourceData> src) noexcept : vm(vm), src(src){
-    this->token_start = src->source.c_str();
-    this->curr_char = src->source.c_str();
+    this->token_start = pkpy_Str__data(&src->source);
+    this->curr_char = pkpy_Str__data(&src->source);
 }
 
 Error* Lexer::run() noexcept{
@@ -557,7 +557,7 @@ Error* Lexer::run() noexcept{
 }
 
 Error* Lexer::from_precompiled() noexcept{
-    TokenDeserializer deserializer(src->source.c_str());
+    TokenDeserializer deserializer(pkpy_Str__data(&src->source));
     deserializer.curr += 5;  // skip "pkpy:"
     std::string_view version = deserializer.read_string('\n');
 
@@ -569,9 +569,9 @@ Error* Lexer::from_precompiled() noexcept{
     }
 
     int count = deserializer.read_count();
-    vector<Str>& precompiled_tokens = src->_precompiled_tokens;
+    auto precompiled_tokens = &src->_precompiled_tokens;
     for(int i = 0; i < count; i++) {
-        precompiled_tokens.push_back(deserializer.read_string('\n'));
+        c11_vector__push(Str, precompiled_tokens, Str(deserializer.read_string('\n')));
     }
 
     count = deserializer.read_count();
@@ -580,8 +580,8 @@ Error* Lexer::from_precompiled() noexcept{
         t.type = (unsigned char)deserializer.read_uint(',');
         if(is_raw_string_used(t.type)) {
             i64 index = deserializer.read_uint(',');
-            t.start = precompiled_tokens[index].c_str();
-            t.length = precompiled_tokens[index].size;
+            t.start = c11__getitem(Str, precompiled_tokens, index).c_str();
+            t.length = c11__getitem(Str, precompiled_tokens, index).size;
         } else {
             t.start = nullptr;
             t.length = 0;

+ 2 - 2
src/interpreter/profiler.cpp

@@ -20,7 +20,7 @@ void LineProfiler::begin() { frames.clear(); }
 void LineProfiler::_step(int callstack_size, Frame* frame) {
     auto line_info = frame->co->lines[frame->ip()];
     if(line_info.is_virtual) return;
-    std::string_view filename = frame->co->src->filename.sv();
+    std::string_view filename = frame->co->src.filename().sv();
     int line = line_info.lineno;
 
     if(frames.empty()) {
@@ -87,7 +87,7 @@ Str LineProfiler::stats() {
         int start_line = decl->code->start_line;
         int end_line = decl->code->end_line;
         if(start_line == -1 || end_line == -1) continue;
-        std::string_view filename = decl->code->src->filename.sv();
+        std::string_view filename = decl->code->src.filename().sv();
         const _LineRecord* file_records = records[filename];
         clock_t total_time = 0;
         for(int line = start_line; line <= end_line; line++) {

+ 1 - 1
src/interpreter/vm.cpp

@@ -1706,7 +1706,7 @@ void VM::__breakpoint() {
                 SStream ss;
                 Frame* frame = &frames[i]->frame;
                 int lineno = frame->curr_lineno();
-                ss << "File \"" << frame->co->src->filename << "\", line " << lineno;
+                ss << "File \"" << frame->co->src.filename() << "\", line " << lineno;
                 if(frame->_callable) {
                     ss << ", in ";
                     ss << frame->_callable->as<Function>().decl->code->name;

+ 0 - 68
src/objects/sourcedata.cpp

@@ -1,68 +0,0 @@
-#include "pocketpy/objects/sourcedata.hpp"
-
-namespace pkpy {
-SourceData::SourceData(std::string_view source, const Str& filename, CompileMode mode) :
-    filename(filename), mode(mode) {
-    int index = 0;
-    // Skip utf8 BOM if there is any.
-    if(strncmp(source.data(), "\xEF\xBB\xBF", 3) == 0) index += 3;
-    // Drop all '\r'
-    SStream ss(source.size() + 1);
-    while(index < source.size()) {
-        if(source[index] != '\r') ss << source[index];
-        index++;
-    }
-    this->source = ss.str();
-    if(this->source.size > 5 && this->source.sv().substr(0, 5) == "pkpy:") {
-        this->is_precompiled = true;
-    } else {
-        this->is_precompiled = false;
-    }
-    line_starts.push_back(this->source.c_str());
-}
-
-SourceData::SourceData(const Str& filename, CompileMode mode) : filename(filename), mode(mode) {
-    line_starts.push_back(this->source.c_str());
-}
-
-pair<const char*, const char*> SourceData::_get_line(int lineno) const {
-    if(is_precompiled || lineno == -1) return {nullptr, nullptr};
-    lineno -= 1;
-    if(lineno < 0) lineno = 0;
-    const char* _start = line_starts[lineno];
-    const char* i = _start;
-    // max 300 chars
-    while(*i != '\n' && *i != '\0' && i - _start < 300)
-        i++;
-    return {_start, i};
-}
-
-std::string_view SourceData::get_line(int lineno) const {
-    auto [_0, _1] = _get_line(lineno);
-    if(_0 && _1) return std::string_view(_0, _1 - _0);
-    return "<?>";
-}
-
-Str SourceData::snapshot(int lineno, const char* cursor, std::string_view name) const {
-    SStream ss;
-    ss << "  " << "File \"" << filename << "\", line " << lineno;
-    if(!name.empty()) ss << ", in " << name;
-    if(!is_precompiled) {
-        ss << '\n';
-        pair<const char*, const char*> pair = _get_line(lineno);
-        Str line = "<?>";
-        int removed_spaces = 0;
-        if(pair.first && pair.second) {
-            line = Str(pair.first, pair.second - pair.first).lstrip();
-            removed_spaces = pair.second - pair.first - line.length();
-            if(line.empty()) line = "<?>";
-        }
-        ss << "    " << line;
-        if(cursor && line != "<?>" && cursor >= pair.first && cursor <= pair.second) {
-            auto column = cursor - pair.first - removed_spaces;
-            if(column >= 0) ss << "\n    " << std::string(column, ' ') << "^";
-        }
-    }
-    return ss.str();
-}
-}  // namespace pkpy