浏览代码

fix https://github.com/blueloveTH/pocketpy/issues/59

blueloveTH 2 年之前
父节点
当前提交
721b7a2959
共有 10 个文件被更改,包括 139 次插入75 次删除
  1. 11 11
      python/builtins.py
  2. 25 34
      src/ceval.h
  3. 1 1
      src/compiler.h
  4. 6 0
      src/gc.h
  5. 2 2
      src/lexer.h
  6. 4 12
      src/obj.h
  7. 22 9
      src/pocketpy.h
  8. 8 5
      src/str.h
  9. 43 0
      src/vm.h
  10. 17 1
      tests/04_str.py

+ 11 - 11
python/builtins.py

@@ -71,7 +71,7 @@ def sorted(iterable, reverse=False):
 
 str.__mul__ = lambda self, n: ''.join([self for _ in range(n)])
 
-def str::split(self, sep):
+def str@split(self, sep):
     if sep == "":
         return list(self)
     res = []
@@ -86,7 +86,7 @@ def str::split(self, sep):
     res.append(self)
     return res
 
-def str::format(self, *args):
+def str@format(self, *args):
     if '{}' in self:
         for i in range(len(args)):
             self = self.replace('{}', str(args[i]), 1)
@@ -95,7 +95,7 @@ def str::format(self, *args):
             self = self.replace('{'+str(i)+'}', str(args[i]))
     return self
 
-def str::strip(self, chars=None):
+def str@strip(self, chars=None):
     chars = chars or ' \t\n\r'
     i = 0
     while i < len(self) and self[i] in chars:
@@ -127,30 +127,30 @@ def __qsort(a: list, L: int, R: int):
     __qsort(a, L, j)
     __qsort(a, i, R)
 
-def list::sort(self, reverse=False):
+def list@sort(self, reverse=False):
     __qsort(self, 0, len(self)-1)
     if reverse:
         self.reverse()
 
-def list::remove(self, value):
+def list@remove(self, value):
     for i in range(len(self)):
         if self[i] == value:
             del self[i]
             return True
     return False
 
-def list::index(self, value):
+def list@index(self, value):
     for i in range(len(self)):
         if self[i] == value:
             return i
     return -1
 
-def list::pop(self, i=-1):
+def list@pop(self, i=-1):
     res = self[i]
     del self[i]
     return res
 
-def list::__eq__(self, other):
+def list@__eq__(self, other):
     if type(self) is not type(other):
         return False
     if len(self) != len(other):
@@ -163,7 +163,7 @@ tuple.__eq__ = list.__eq__
 list.__ne__ = lambda self, other: not self.__eq__(other)
 tuple.__ne__ = lambda self, other: not self.__eq__(other)
 
-def list::count(self, x):
+def list@count(self, x):
     res = 0
     for i in self:
         if i == x:
@@ -171,7 +171,7 @@ def list::count(self, x):
     return res
 tuple.count = list.count
 
-def list::__contains__(self, item):
+def list@__contains__(self, item):
     for i in self:
         if i == item:
             return True
@@ -202,5 +202,5 @@ class staticmethod:
     def __call__(self, *args):
         return self.f(*args)
     
-def type::__repr__(self):
+def type@__repr__(self):
     return "<class '" + self.__name__ + "'>"

+ 25 - 34
src/ceval.h

@@ -227,16 +227,12 @@ __NEXT_STEP:;
         STACK_SHRINK(byte.arg);
         PUSH(obj);
     } DISPATCH();
-    TARGET(BUILD_SLICE) {
-        _2 = POPX();
-        _1 = POPX();
-        _0 = POPX();
-        Slice s;
-        if(_0 != None) s.start = CAST(int, _0);
-        if(_1 != None) s.stop = CAST(int, _1);
-        if(_2 != None) s.step = CAST(int, _2);
-        PUSH(VAR(s));
-    } DISPATCH();
+    TARGET(BUILD_SLICE)
+        _2 = POPX();    // step
+        _1 = POPX();    // stop
+        _0 = POPX();    // start
+        PUSH(VAR(Slice(_0, _1, _2)));
+        DISPATCH();
     TARGET(BUILD_TUPLE)
         _0 = VAR(STACK_VIEW(byte.arg).to_tuple());
         STACK_SHRINK(byte.arg);
@@ -352,14 +348,11 @@ __NEXT_STEP:;
         if(asBool(TOP()) == false) frame->jump_abs(byte.arg);
         else POP();
         DISPATCH();
-    TARGET(LOOP_CONTINUE) {
-        int target = co_blocks[byte.block].start;
-        frame->jump_abs(target);
-    } DISPATCH();
+    TARGET(LOOP_CONTINUE)
+        frame->jump_abs(co_blocks[byte.block].start);
+        DISPATCH();
     TARGET(LOOP_BREAK)
-        frame->jump_abs_break(
-            co_blocks[byte.block].end
-        );
+        frame->jump_abs_break(co_blocks[byte.block].end);
         DISPATCH();
     TARGET(GOTO) {
         StrName name(byte.arg);
@@ -393,11 +386,10 @@ __NEXT_STEP:;
     TARGET(YIELD_VALUE)
         return PY_OP_YIELD;
     /*****************************************/
-    TARGET(LIST_APPEND) {
-        PyObject* obj = POPX();
-        List& list = CAST(List&, SECOND());
-        list.push_back(obj);
-    } DISPATCH();
+    TARGET(LIST_APPEND)
+        _0 = POPX();
+        CAST(List&, SECOND()).push_back(_0);
+        DISPATCH();
     TARGET(DICT_ADD) {
         _0 = POPX();
         Tuple& t = CAST(Tuple&, _0);
@@ -509,16 +501,15 @@ __NEXT_STEP:;
         PyObject* cls = new_type_object(frame->_module, name, OBJ_GET(Type, super_cls));
         PUSH(cls);
     } DISPATCH();
-    TARGET(END_CLASS) {
-        PyObject* cls = POPX();
-        cls->attr()._try_perfect_rehash();
-    }; DISPATCH();
-    TARGET(STORE_CLASS_ATTR) {
-        StrName name(byte.arg);
-        PyObject* obj = POPX();
-        PyObject* cls = TOP();
-        cls->attr().set(name, obj);
-    } DISPATCH();
+    TARGET(END_CLASS)
+        _0 = POPX();
+        _0->attr()._try_perfect_rehash();
+        DISPATCH();
+    TARGET(STORE_CLASS_ATTR)
+        _name = StrName(byte.arg);
+        _0 = POPX();
+        TOP()->attr().set(_name, _0);
+        DISPATCH();
     /*****************************************/
     // // TODO: using "goto" inside with block may cause __exit__ not called
     TARGET(WITH_ENTER)
@@ -543,8 +534,8 @@ __NEXT_STEP:;
     } DISPATCH();
     TARGET(EXCEPTION_MATCH) {
         const auto& e = CAST(Exception&, TOP());
-        StrName name(byte.arg);
-        PUSH(VAR(e.match_type(name)));
+        _name = StrName(byte.arg);
+        PUSH(VAR(e.match_type(_name)));
     } DISPATCH();
     TARGET(RAISE) {
         PyObject* obj = POPX();

+ 1 - 1
src/compiler.h

@@ -906,7 +906,7 @@ __SUBSCR_END:
         Str decl_name;
         consume(TK("@id"));
         decl_name = prev().str();
-        if(!ctx()->is_compiling_class && match(TK("::"))){
+        if(!ctx()->is_compiling_class && match(TK("@"))){
             consume(TK("@id"));
             obj_name = decl_name;
             decl_name = prev().str();

+ 6 - 0
src/gc.h

@@ -141,6 +141,12 @@ template<> inline void gc_mark<BoundMethod>(BoundMethod& t){
     OBJ_MARK(t.func);
 }
 
+template<> inline void gc_mark<Slice>(Slice& t){
+    OBJ_MARK(t.start);
+    OBJ_MARK(t.stop);
+    OBJ_MARK(t.step);
+}
+
 template<> inline void gc_mark<Function>(Function& t){
     t.decl->_gc_mark();
     if(t._module != nullptr) OBJ_MARK(t._module);

+ 2 - 2
src/lexer.h

@@ -19,7 +19,7 @@ constexpr const char* kTokens[] = {
     "&", "&=", "|", "|=", "^", "^=", 
     "<<", "<<=", ">>", ">>=",
     /*****************************************/
-    ".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "::",
+    ".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}",
     "**", "=", ">", "<", "...", "->", "?", "@", "==", "!=", ">=", "<=",
     /** KW_BEGIN **/
     "class", "import", "as", "def", "lambda", "pass", "del", "from", "with", "yield",
@@ -368,7 +368,7 @@ struct Lexer {
                 case '{': add_token(TK("{")); return true;
                 case '}': add_token(TK("}")); return true;
                 case ',': add_token(TK(",")); return true;
-                case ':': add_token_2(':', TK(":"), TK("::")); return true;
+                case ':': add_token(TK(":")); return true;
                 case ';': add_token(TK(";")); return true;
                 case '(': add_token(TK("(")); return true;
                 case ')': add_token(TK(")")); return true;

+ 4 - 12
src/obj.h

@@ -81,19 +81,11 @@ struct Bytes{
 
 using Super = std::pair<PyObject*, Type>;
 
-// TODO: re-examine the design of Slice
 struct Slice {
-    int start = 0;
-    int stop = 0x7fffffff;
-    int step = 1;
-
-    void normalize(int len){
-        if(start < 0) start += len;
-        if(stop < 0) stop += len;
-        if(start < 0) start = 0;
-        if(stop > len) stop = len;
-        if(stop < start) stop = start;
-    }
+    PyObject* start;
+    PyObject* stop;
+    PyObject* step;
+    Slice(PyObject* start, PyObject* stop, PyObject* step) : start(start), stop(stop), step(step) {}
 };
 
 class BaseIter {

+ 22 - 9
src/pocketpy.h

@@ -368,9 +368,10 @@ inline void init_builtins(VM* _vm) {
         const Str& self (CAST(Str&, args[0]));
 
         if(is_type(args[1], vm->tp_slice)){
-            Slice s = _CAST(Slice, args[1]);
-            s.normalize(self.u8_length());
-            return VAR(self.u8_slice(s.start, s.stop));
+            const Slice& s = _CAST(Slice&, args[1]);
+            int start, stop, step;
+            vm->parse_int_slice(s, self.u8_length(), start, stop, step);
+            return VAR(self.u8_slice(start, stop, step));
         }
 
         int index = CAST(int, args[1]);
@@ -509,10 +510,11 @@ inline void init_builtins(VM* _vm) {
         const List& self = CAST(List&, args[0]);
 
         if(is_type(args[1], vm->tp_slice)){
-            Slice s = _CAST(Slice, args[1]);
-            s.normalize(self.size());
+            const Slice& s = _CAST(Slice&, args[1]);
+            int start, stop, step;
+            vm->parse_int_slice(s, self.size(), start, stop, step);
             List new_list;
-            for(size_t i = s.start; i < s.stop; i++) new_list.push_back(self[i]);
+            for(int i=start; step>0?i<stop:i>stop; i+=step) new_list.push_back(self[i]);
             return VAR(std::move(new_list));
         }
 
@@ -551,10 +553,11 @@ inline void init_builtins(VM* _vm) {
         const Tuple& self = CAST(Tuple&, args[0]);
 
         if(is_type(args[1], vm->tp_slice)){
-            Slice s = _CAST(Slice, args[1]);
-            s.normalize(self.size());
+            const Slice& s = _CAST(Slice&, args[1]);
+            int start, stop, step;
+            vm->parse_int_slice(s, self.size(), start, stop, step);
             List new_list;
-            for(size_t i = s.start; i < s.stop; i++) new_list.push_back(self[i]);
+            for(int i=start; step>0?i<stop:i>stop; i+=step) new_list.push_back(self[i]);
             return VAR(Tuple(std::move(new_list)));
         }
 
@@ -877,6 +880,16 @@ inline void VM::post_init(){
     _t(tp_bound_method)->attr().set("__func__", property([](VM* vm, ArgsView args){
         return CAST(BoundMethod&, args[0]).func;
     }));
+
+    _t(tp_slice)->attr().set("start", property([](VM* vm, ArgsView args){
+        return CAST(Slice&, args[0]).start;
+    }));
+    _t(tp_slice)->attr().set("stop", property([](VM* vm, ArgsView args){
+        return CAST(Slice&, args[0]).stop;
+    }));
+    _t(tp_slice)->attr().set("step", property([](VM* vm, ArgsView args){
+        return CAST(Slice&, args[0]).step;
+    }));
 #endif
 }
 

+ 8 - 5
src/str.h

@@ -265,11 +265,14 @@ struct Str{
         return substr(i, utf8len(data[i]));
     }
 
-    Str u8_slice(int start, int end) const{
-        // TODO: optimize this
-        start = _unicode_index_to_byte(start);
-        end = _unicode_index_to_byte(end);
-        return substr(start, end - start);
+    Str u8_slice(int start, int stop, int step) const{
+        std::stringstream ss;
+        if(is_ascii){
+            for(int i=start; step>0?i<stop:i>stop; i+=step) ss << data[i];
+        }else{
+            for(int i=start; step>0?i<stop:i>stop; i+=step) ss << u8_getitem(i);
+        }
+        return ss.str();
     }
 
     int u8_length() const {

+ 43 - 0
src/vm.h

@@ -381,6 +381,7 @@ public:
     PyObject* _py_call(PyObject** sp_base, PyObject* callable, ArgsView args, ArgsView kwargs);
     PyObject* getattr(PyObject* obj, StrName name, bool throw_err=true);
     PyObject* get_unbound_method(PyObject* obj, StrName name, PyObject** self, bool throw_err=true, bool fallback=false);
+    void parse_int_slice(const Slice& s, int length, int& start, int& stop, int& step);
     void setattr(PyObject* obj, StrName name, PyObject* value);
     template<int ARGC>
     void bind_method(PyObject*, Str, NativeFuncC);
@@ -557,6 +558,48 @@ inline bool VM::asBool(PyObject* obj){
     return true;
 }
 
+inline void VM::parse_int_slice(const Slice& s, int length, int& start, int& stop, int& step){
+    auto clip = [](int value, int min, int max){
+        if(value < min) return min;
+        if(value > max) return max;
+        return value;
+    };
+    if(s.step == None) step = 1;
+    else step = CAST(int, s.step);
+    if(step == 0) ValueError("slice step cannot be zero");
+    if(step > 0){
+        if(s.start == None){
+            start = 0;
+        }else{
+            start = CAST(int, s.start);
+            if(start < 0) start += length;
+            start = clip(start, 0, length);
+        }
+        if(s.stop == None){
+            stop = length;
+        }else{
+            stop = CAST(int, s.stop);
+            if(stop < 0) stop += length;
+            stop = clip(stop, 0, length);
+        }
+    }else{
+        if(s.start == None){
+            start = length - 1;
+        }else{
+            start = CAST(int, s.start);
+            if(start < 0) start += length;
+            start = clip(start, -1, length - 1);
+        }
+        if(s.stop == None){
+            stop = -1;
+        }else{
+            stop = CAST(int, s.stop);
+            if(stop < 0) stop += length;
+            stop = clip(stop, -1, length - 1);
+        }
+    }
+}
+
 inline i64 VM::hash(PyObject* obj){
     if (is_non_tagged_type(obj, tp_str)) return CAST(Str&, obj).hash();
     if (is_int(obj)) return CAST(i64, obj);

+ 17 - 1
tests/04_str.py

@@ -81,4 +81,20 @@ assert "Hello, {}!".format("World") == "Hello, World!"
 assert "{} {} {}".format("I", "love", "Python") == "I love Python"
 assert "{0} {1} {2}".format("I", "love", "Python") == "I love Python"
 assert "{2} {1} {0}".format("I", "love", "Python") == "Python love I"
-assert "{0}{1}{0}".format("abra", "cad") == "abracadabra"
+assert "{0}{1}{0}".format("abra", "cad") == "abracadabra"
+
+# 3rd slice
+a = "Hello, World!"
+assert a[::-1] == "!dlroW ,olleH"
+assert a[::2] == "Hlo ol!"
+assert a[2:5:2] == "lo"
+assert a[5:2:-1] == ",ol"
+assert a[5:2:-2] == ",l"
+
+b = list(a)
+assert b == ['H', 'e', 'l', 'l', 'o', ',', ' ', 'W', 'o', 'r', 'l', 'd', '!']
+assert b[::-1] == ['!', 'd', 'l', 'r', 'o', 'W', ' ', ',', 'o', 'l', 'l', 'e', 'H']
+assert b[::2] == ['H', 'l', 'o', ' ', 'o', 'l', '!']
+assert b[2:5:2] == ['l', 'o']
+assert b[5:2:-1] == [',', 'o', 'l']
+assert b[5:2:-2] == [',', 'l']