BLUELOVETH 2 years ago
parent
commit
7324f897b5
8 changed files with 237 additions and 267 deletions
  1. 1 1
      amalgamate.py
  2. 2 0
      src/iter.h
  3. 0 1
      src/lexer.h
  4. 0 1
      src/memory.h
  5. 0 177
      src/new_str.h
  6. 26 26
      src/pocketpy.h
  7. 205 58
      src/str.h
  8. 3 3
      src/vm.h

+ 1 - 1
amalgamate.py

@@ -6,7 +6,7 @@ with open("src/opcodes.h", "rt", encoding='utf-8') as f:
 	OPCODES_TEXT = f.read()
 
 pipeline = [
-	["common.h", "vector.h", "memory.h", "str.h", "tuplelist.h", "namedict.h", "error.h", "lexer.h"],
+	["common.h", "memory.h", "vector.h", "str.h", "tuplelist.h", "namedict.h", "error.h", "lexer.h"],
 	["obj.h", "codeobject.h", "frame.h"],
 	["gc.h", "vm.h", "ceval.h", "expr.h", "compiler.h", "repl.h"],
 	["iter.h", "cffi.h", "io.h", "_generated.h", "pocketpy.h"]

+ 2 - 0
src/iter.h

@@ -49,6 +49,8 @@ public:
     StringIter(VM* vm, PyObject* ref) : BaseIter(vm), ref(ref), index(0) {}
 
     PyObject* next() override{
+        // TODO: optimize this to use iterator
+        // operator[] is O(n) complexity
         Str* str = &OBJ_GET(Str, ref);
         if(index == str->u8_length()) return nullptr;
         return VAR(str->u8_getitem(index++));

+ 0 - 1
src/lexer.h

@@ -2,7 +2,6 @@
 
 #include "common.h"
 #include "error.h"
-#include "new_str.h"
 #include "str.h"
 
 namespace pkpy{

+ 0 - 1
src/memory.h

@@ -73,7 +73,6 @@ struct LinkedListNode{
     LinkedListNode* next;
 };
 
-
 template<typename T>
 struct DoubleLinkedList{
     static_assert(std::is_base_of_v<LinkedListNode, T>);

+ 0 - 177
src/new_str.h

@@ -1,177 +0,0 @@
-#pragma once
-
-#include "common.h"
-#include "memory.h"
-
-namespace pkpy{
-
-inline int utf8len(unsigned char c){
-    if((c & 0b10000000) == 0) return 1;
-    if((c & 0b11100000) == 0b11000000) return 2;
-    if((c & 0b11110000) == 0b11100000) return 3;
-    if((c & 0b11111000) == 0b11110000) return 4;
-    if((c & 0b11111100) == 0b11111000) return 5;
-    if((c & 0b11111110) == 0b11111100) return 6;
-    return 0;
-}
-
-struct String{
-    int size;
-    bool is_ascii;
-    char* data;
-
-    String(): size(0), is_ascii(true), data((char*)pool64.alloc(0)) {}
-
-    String(int size, bool is_ascii): size(size), is_ascii(is_ascii) {
-        data = (char*)pool64.alloc(size);
-    }
-
-    String(const char* str): size(strlen(str)), is_ascii(true) {
-        data = (char*)pool64.alloc(size);
-        for(int i=0; i<size; i++){
-            data[i] = str[i];
-            if(!isascii(str[i])) is_ascii = false;
-        }
-    }
-
-    String(const String& other): size(other.size), is_ascii(other.is_ascii) {
-        data = (char*)pool64.alloc(size);
-        memcpy(data, other.data, size);
-    }
-
-    String(String&& other): size(other.size), is_ascii(other.is_ascii), data(other.data) {
-        other.data = nullptr;
-        other.size = 0;
-    }
-
-    String& operator=(const String& other){
-        if(data!=nullptr) pool64.dealloc(data);
-        size = other.size;
-        is_ascii = other.is_ascii;
-        data = (char*)pool64.alloc(size);
-        memcpy(data, other.data, size);
-        return *this;
-    }
-
-    String& operator=(String&& other){
-        if(data!=nullptr) pool64.dealloc(data);
-        size = other.size;
-        is_ascii = other.is_ascii;
-        data = other.data;
-        other.data = nullptr;
-        return *this;
-    }
-
-    ~String(){
-        if(data!=nullptr) pool64.dealloc(data);
-    }
-
-    char operator[](int idx) const {
-        return data[idx];
-    }
-
-    int length() const {
-        return size;
-    }
-
-    String operator+(const String& other) const {
-        String ret(size + other.size, is_ascii && other.is_ascii);
-        memcpy(ret.data, data, size);
-        memcpy(ret.data + size, other.data, other.size);
-        return ret;
-    }
-
-    friend std::ostream& operator<<(std::ostream& os, const String& str){
-        os.write(str.data, str.size);
-        return os;
-    }
-
-    bool operator==(const String& other) const {
-        if(size != other.size) return false;
-        return memcmp(data, other.data, size) == 0;
-    }
-
-    bool operator!=(const String& other) const {
-        if(size != other.size) return true;
-        return memcmp(data, other.data, size) != 0;
-    }
-
-    bool operator<(const String& other) const {
-        int ret = strncmp(data, other.data, std::min(size, other.size));
-        if(ret != 0) return ret < 0;
-        return size < other.size;
-    }
-
-    bool operator>(const String& other) const {
-        int ret = strncmp(data, other.data, std::min(size, other.size));
-        if(ret != 0) return ret > 0;
-        return size > other.size;
-    }
-
-    bool operator<=(const String& other) const {
-        int ret = strncmp(data, other.data, std::min(size, other.size));
-        if(ret != 0) return ret < 0;
-        return size <= other.size;
-    }
-
-    bool operator>=(const String& other) const {
-        int ret = strncmp(data, other.data, std::min(size, other.size));
-        if(ret != 0) return ret > 0;
-        return size >= other.size;
-    }
-
-    String substr(int start, int len) const {
-        String ret(len, is_ascii);
-        memcpy(ret.data, data + start, len);
-        return ret;
-    }
-
-    char* dup_c_str() const {
-        char* p = (char*)malloc(size + 1);
-        memcpy(p, data, size);
-        p[size] = 0;
-        return p;
-    }
-
-    std::string_view view() const {
-        return std::string_view(data, size);
-    }
-
-    std::string str() const {
-        return std::string(data, size);
-    }
-
-    String lstrip() const {
-        std::string copy = str();
-        copy.erase(copy.begin(), std::find_if(copy.begin(), copy.end(), [](char c) {
-            // std::isspace(c) does not working on windows (Debug)
-            return c != ' ' && c != '\t' && c != '\r' && c != '\n';
-        }));
-        return String(copy.c_str());
-    }
-
-    /*************unicode*************/
-
-    int _u8_index(int i) const{
-        if(is_ascii) return i;
-        int j = 0;
-        while(i > 0){
-            j += utf8len(data[j]);
-            i--;
-        }
-        return j;
-    }
-
-    String u8_getitem(int i) const {
-        i = _u8_index(i);
-        return substr(i, utf8len(data[i]));
-    }
-
-    String u8_slice(int start, int end) const{
-        start = _u8_index(start);
-        end = _u8_index(end);
-        return substr(start, end - start);
-    }
-};
-
-}   // namespace pkpy

+ 26 - 26
src/pocketpy.h

@@ -131,8 +131,8 @@ inline void init_builtins(VM* _vm) {
 
     _vm->bind_builtin_func<1>("ord", [](VM* vm, Args& args) {
         const Str& s = CAST(Str&, args[0]);
-        if (s.size() != 1) vm->TypeError("ord() expected an ASCII character");
-        return VAR((i64)(s.c_str()[0]));
+        if (s.length()!=1) vm->TypeError("ord() expected an ASCII character");
+        return VAR((i64)(s[0]));
     });
 
     _vm->bind_builtin_func<2>("hasattr", [](VM* vm, Args& args) {
@@ -237,8 +237,8 @@ inline void init_builtins(VM* _vm) {
             const Str& s = CAST(Str&, args[0]);
             try{
                 size_t parsed = 0;
-                i64 val = S_TO_INT(s, &parsed, 10);
-                if(parsed != s.size()) throw std::invalid_argument("<?>");
+                i64 val = S_TO_INT(s.str(), &parsed, 10);
+                if(parsed != s.length()) throw std::invalid_argument("<?>");
                 return VAR(val);
             }catch(std::invalid_argument&){
                 vm->ValueError("invalid literal for int(): " + s.escape(true));
@@ -284,7 +284,7 @@ inline void init_builtins(VM* _vm) {
             if(s == "inf") return VAR(INFINITY);
             if(s == "-inf") return VAR(-INFINITY);
             try{
-                f64 val = S_TO_FLOAT(s);
+                f64 val = S_TO_FLOAT(s.str());
                 return VAR(val);
             }catch(std::invalid_argument&){
                 vm->ValueError("invalid literal for float(): '" + s + "'");
@@ -327,7 +327,7 @@ inline void init_builtins(VM* _vm) {
     _vm->bind_method<1>("str", "__contains__", [](VM* vm, Args& args) {
         const Str& self = CAST(Str&, args[0]);
         const Str& other = CAST(Str&, args[1]);
-        return VAR(self.find(other) != Str::npos);
+        return VAR(self.index(other) != -1);
     });
 
     _vm->bind_method<0>("str", "__str__", CPP_LAMBDA(args[0]));
@@ -361,7 +361,7 @@ inline void init_builtins(VM* _vm) {
         if(is_type(args[1], vm->tp_slice)){
             Slice s = _CAST(Slice, args[1]);
             s.normalize(self.u8_length());
-            return VAR(self.u8_substr(s.start, s.stop));
+            return VAR(self.u8_slice(s.start, s.stop));
         }
 
         int index = CAST(int, args[1]);
@@ -382,28 +382,25 @@ inline void init_builtins(VM* _vm) {
     });
 
     _vm->bind_method<2>("str", "replace", [](VM* vm, Args& args) {
-        const Str& _self = CAST(Str&, args[0]);
-        const Str& _old = CAST(Str&, args[1]);
-        const Str& _new = CAST(Str&, args[2]);
-        Str _copy = _self;
-        size_t pos = 0;
-        while ((pos = _copy.find(_old, pos)) != std::string::npos) {
-            _copy.replace(pos, _old.length(), _new);
-            pos += _new.length();
-        }
-        return VAR(_copy);
+        const Str& self = CAST(Str&, args[0]);
+        const Str& old = CAST(Str&, args[1]);
+        const Str& new_ = CAST(Str&, args[2]);
+        return VAR(self.replace(old, new_));
     });
 
     _vm->bind_method<1>("str", "startswith", [](VM* vm, Args& args) {
         const Str& self = CAST(Str&, args[0]);
         const Str& prefix = CAST(Str&, args[1]);
-        return VAR(self.find(prefix) == 0);
+        return VAR(self.index(prefix) == 0);
     });
 
     _vm->bind_method<1>("str", "endswith", [](VM* vm, Args& args) {
         const Str& self = CAST(Str&, args[0]);
         const Str& suffix = CAST(Str&, args[1]);
-        return VAR(self.rfind(suffix) == self.length() - suffix.length());
+        int offset = self.length() - suffix.length();
+        if(offset < 0) return vm->False;
+        bool ok = memcmp(self.data+offset, suffix.data, suffix.length()) == 0;
+        return VAR(ok);
     });
 
     _vm->bind_method<1>("str", "join", [](VM* vm, Args& args) {
@@ -664,13 +661,15 @@ struct ReMatch {
     }
 };
 
-inline PyObject* _regex_search(const Str& pattern, const Str& string, bool fromStart, VM* vm){
+inline PyObject* _regex_search(const Str& _pattern, const Str& _string, bool fromStart, VM* vm){
+    std::string pattern = _pattern.str();
+    std::string string = _string.str();
     std::regex re(pattern);
     std::smatch m;
     if(std::regex_search(string, m, re)){
         if(fromStart && m.position() != 0) return vm->None;
-        i64 start = string._to_u8_index(m.position());
-        i64 end = string._to_u8_index(m.position() + m.length());
+        i64 start = _string._u8_index(m.position());
+        i64 end = _string._u8_index(m.position() + m.length());
         return VAR_T(ReMatch, start, end, m);
     }
     return vm->None;
@@ -695,14 +694,15 @@ inline void add_module_re(VM* vm){
     vm->bind_func<3>(mod, "sub", [](VM* vm, Args& args) {
         const Str& pattern = CAST(Str&, args[0]);
         const Str& repl = CAST(Str&, args[1]);
-        const Str& string = CAST(Str&, args[2]);
-        std::regex re(pattern);
+        const Str& _string = CAST(Str&, args[2]);
+        std::regex re(pattern.str());
+        std::string string = _string.str();
         return VAR(std::regex_replace(string, re, repl));
     });
 
     vm->bind_func<2>(mod, "split", [](VM* vm, Args& args) {
-        const Str& pattern = CAST(Str&, args[0]);
-        const Str& string = CAST(Str&, args[1]);
+        std::string pattern = CAST(Str&, args[0]).str();
+        std::string string = CAST(Str&, args[1]).str();
         std::regex re(pattern);
         std::sregex_token_iterator it(string.begin(), string.end(), re, -1);
         std::sregex_token_iterator end;

File diff suppressed because it is too large
+ 205 - 58
src/str.h


+ 3 - 3
src/vm.h

@@ -561,8 +561,8 @@ inline PyObject* VM::new_module(StrName name) {
 
 inline Str VM::disassemble(CodeObject_ co){
     auto pad = [](const Str& s, const int n){
-        if(s.size() >= n) return s.substr(0, n);
-        return s + std::string(n - s.size(), ' ');
+        if(s.length() >= n) return s.substr(0, n);
+        return s + std::string(n - s.length(), ' ');
     };
 
     std::vector<int> jumpTargets;
@@ -591,7 +591,7 @@ inline Str VM::disassemble(CodeObject_ co){
         ss << pad(line, 8) << pointer << pad(std::to_string(i), 3);
         ss << " " << pad(OP_NAMES[byte.op], 20) << " ";
         // ss << pad(byte.arg == -1 ? "" : std::to_string(byte.arg), 5);
-        Str argStr = byte.arg == -1 ? "" : std::to_string(byte.arg);
+        std::string argStr = byte.arg == -1 ? "" : std::to_string(byte.arg);
         switch(byte.op){
             case OP_LOAD_CONST:
                 argStr += " (" + CAST(Str, asRepr(co->consts[byte.arg])) + ")";

Some files were not shown because too many files changed in this diff