blueloveTH 2 лет назад
Родитель
Сommit
2e46449117
3 измененных файлов с 56 добавлено и 27 удалено
  1. 0 2
      src/ceval.h
  2. 4 7
      src/lexer.h
  3. 52 18
      src/new_str.h

+ 0 - 2
src/ceval.h

@@ -277,8 +277,6 @@ __NEXT_STEP:;
     } DISPATCH();
     case OP_DICT_ADD: {
         PyObject* kv = frame->popx();
-        // we do copy here to avoid accidental gc in `kv`
-        // TODO: optimize to avoid copy
         Tuple& t = CAST(Tuple& ,kv);
         fast_call(__setitem__, Args{frame->top_1(), t[0], t[1]});
     } DISPATCH();

+ 4 - 7
src/lexer.h

@@ -2,6 +2,7 @@
 
 #include "common.h"
 #include "error.h"
+#include "new_str.h"
 #include "str.h"
 
 namespace pkpy{
@@ -170,13 +171,9 @@ struct Lexer {
     int eat_name() {
         curr_char--;
         while(true){
-            uint8_t c = peekchar();
-            int u8bytes = 0;
-            if((c & 0b10000000) == 0b00000000) u8bytes = 1;
-            else if((c & 0b11100000) == 0b11000000) u8bytes = 2;
-            else if((c & 0b11110000) == 0b11100000) u8bytes = 3;
-            else if((c & 0b11111000) == 0b11110000) u8bytes = 4;
-            else return 1;
+            unsigned char c = peekchar();
+            int u8bytes = utf8len(c);
+            if(u8bytes == 0) return 1;
             if(u8bytes == 1){
                 if(isalpha(c) || c=='_' || isdigit(c)) {
                     curr_char++;

+ 52 - 18
src/new_str.h

@@ -2,33 +2,52 @@
 
 #include "common.h"
 #include "memory.h"
-#include <string_view>
 
 namespace pkpy{
 
+inline int utf8len(unsigned char c){
+    if((c & 0b10000000) == 0) return 1;
+    if((c & 0b11100000) == 0b11000000) return 2;
+    if((c & 0b11110000) == 0b11100000) return 3;
+    if((c & 0b11111000) == 0b11110000) return 4;
+    if((c & 0b11111100) == 0b11111000) return 5;
+    if((c & 0b11111110) == 0b11111100) return 6;
+    return 0;
+}
+
 struct String{
-    char* data;
     int size;
+    bool is_ascii;
+    char* data;
 
-    String(): data((char*)pool64.alloc(0)), size(0) {}
-    String(int size): data((char*)pool64.alloc(size)), size(size) {}
-    String(const char* str) {
-        size = strlen(str);
+    String(): size(0), is_ascii(true), data((char*)pool64.alloc(0)) {}
+
+    String(int size, bool is_ascii): size(size), is_ascii(is_ascii) {
         data = (char*)pool64.alloc(size);
-        memcpy(data, str, size);
     }
 
-    String(const String& other): data((char*)pool64.alloc(other.size)), size(other.size) {
+    String(const char* str): size(strlen(str)), is_ascii(true) {
+        data = (char*)pool64.alloc(size);
+        for(int i=0; i<size; i++){
+            data[i] = str[i];
+            if(!isascii(str[i])) is_ascii = false;
+        }
+    }
+
+    String(const String& other): size(other.size), is_ascii(other.is_ascii) {
+        data = (char*)pool64.alloc(size);
         memcpy(data, other.data, size);
     }
 
-    String(String&& other): data(other.data), size(other.size) {
+    String(String&& other): size(other.size), is_ascii(other.is_ascii), data(other.data) {
         other.data = nullptr;
+        other.size = 0;
     }
 
     String& operator=(const String& other){
         if(data!=nullptr) pool64.dealloc(data);
         size = other.size;
+        is_ascii = other.is_ascii;
         data = (char*)pool64.alloc(size);
         memcpy(data, other.data, size);
         return *this;
@@ -37,6 +56,7 @@ struct String{
     String& operator=(String&& other){
         if(data!=nullptr) pool64.dealloc(data);
         size = other.size;
+        is_ascii = other.is_ascii;
         data = other.data;
         other.data = nullptr;
         return *this;
@@ -55,7 +75,7 @@ struct String{
     }
 
     String operator+(const String& other) const {
-        String ret(size + other.size);
+        String ret(size + other.size, is_ascii && other.is_ascii);
         memcpy(ret.data, data, size);
         memcpy(ret.data + size, other.data, other.size);
         return ret;
@@ -101,15 +121,11 @@ struct String{
     }
 
     String substr(int start, int len) const {
-        String ret(len);
+        String ret(len, is_ascii);
         memcpy(ret.data, data + start, len);
         return ret;
     }
 
-    String substr(int start) const {
-        return substr(start, size - start);
-    }
-
     char* dup_c_str() const {
         char* p = (char*)malloc(size + 1);
         memcpy(p, data, size);
@@ -133,11 +149,29 @@ struct String{
         }));
         return String(copy.c_str());
     }
-};
 
-struct UnicodeString: String{
+    /*************unicode*************/
 
-};
+    int _u8_index(int i) const{
+        if(is_ascii) return i;
+        int j = 0;
+        while(i > 0){
+            j += utf8len(data[j]);
+            i--;
+        }
+        return j;
+    }
 
+    String u8_getitem(int i) const {
+        i = _u8_index(i);
+        return substr(i, utf8len(data[i]));
+    }
+
+    String u8_slice(int start, int end) const{
+        start = _u8_index(start);
+        end = _u8_index(end);
+        return substr(start, end - start);
+    }
+};
 
 }   // namespace pkpy