blueloveTH 1 rok temu
rodzic
commit
1a9bad4e54
4 zmienionych plików z 107 dodań i 51 usunięć
  1. 4 4
      include/pocketpy/common/str.h
  2. 23 17
      src/common/str.c
  3. 76 26
      src/public/py_str.c
  4. 4 4
      tests/04_str.py

+ 4 - 4
include/pocketpy/common/str.h

@@ -43,14 +43,14 @@ c11_string* c11_string__copy(c11_string* self);
 void c11_string__delete(c11_string* self);
 c11_sv c11_string__sv(c11_string* self);
 
-int c11_string__u8_length(c11_string* self);
-c11_sv c11_string__u8_getitem(c11_string* self, int i);
-c11_string* c11_string__u8_slice(c11_string* self, int start, int stop, int step);
+int c11_sv__u8_length(c11_sv self);
+c11_sv c11_sv__u8_getitem(c11_sv self, int i);
+c11_string* c11_sv__u8_slice(c11_sv self, int start, int stop, int step);
 
 // general string operations
 c11_sv c11_sv__slice(c11_sv sv, int start);
 c11_sv c11_sv__slice2(c11_sv sv, int start, int stop);
-c11_sv c11_sv__strip(c11_sv sv, bool left, bool right);
+c11_sv c11_sv__strip(c11_sv sv, c11_sv chars, bool left, bool right);
 int c11_sv__index(c11_sv self, char c);
 int c11_sv__index2(c11_sv self, c11_sv sub, int start);
 int c11_sv__count(c11_sv self, c11_sv sub);

+ 23 - 17
src/common/str.c

@@ -47,7 +47,7 @@ c11_string* c11_sv__replace(c11_sv self, char old, char new_) {
     return retval;
 }
 
-c11_string* c11_sv__replace2(c11_sv self, c11_sv old, c11_sv new_){
+c11_string* c11_sv__replace2(c11_sv self, c11_sv old, c11_sv new_) {
     c11_sbuf buf;
     c11_sbuf__ctor(&buf);
     int start = 0;
@@ -64,22 +64,20 @@ c11_string* c11_sv__replace2(c11_sv self, c11_sv old, c11_sv new_){
     return c11_sbuf__submit(&buf);
 }
 
-int c11_string__u8_length(c11_string* self) {
-    return c11__byte_index_to_unicode(self->data, self->size);
-}
+int c11_sv__u8_length(c11_sv sv) { return c11__byte_index_to_unicode(sv.data, sv.size); }
 
-c11_sv c11_string__u8_getitem(c11_string* self, int i) {
-    i = c11__unicode_index_to_byte(self->data, i);
-    int size = c11__u8_header(self->data[i], false);
-    return c11_sv__slice2(c11_string__sv(self), i, i + size);
+c11_sv c11_sv__u8_getitem(c11_sv sv, int i) {
+    i = c11__unicode_index_to_byte(sv.data, i);
+    int size = c11__u8_header(sv.data[i], false);
+    return c11_sv__slice2(sv, i, i + size);
 }
 
-c11_string* c11_string__u8_slice(c11_string* self, int start, int stop, int step) {
+c11_string* c11_sv__u8_slice(c11_sv sv, int start, int stop, int step) {
     c11_sbuf ss;
     c11_sbuf__ctor(&ss);
     assert(step != 0);
     for(int i = start; step > 0 ? i < stop : i > stop; i += step) {
-        c11_sv unicode = c11_string__u8_getitem(self, i);
+        c11_sv unicode = c11_sv__u8_getitem(sv, i);
         c11_sbuf__write_sv(&ss, unicode);
     }
     return c11_sbuf__submit(&ss);
@@ -95,20 +93,28 @@ c11_sv c11_sv__slice2(c11_sv sv, int start, int stop) {
     return (c11_sv){sv.data + start, stop - start};
 }
 
-c11_sv c11_sv__strip(c11_sv sv, bool left, bool right) {
+c11_sv c11_sv__strip(c11_sv sv, c11_sv chars, bool left, bool right) {
     int L = 0;
-    int R = sv.size;
-    const char* data = sv.data;
+    int R = c11_sv__u8_length(sv);
     if(left) {
-        while(L < R && (data[L] == ' ' || data[L] == '\t' || data[L] == '\n' || data[L] == '\r'))
+        while(L < R) {
+            c11_sv tmp = c11_sv__u8_getitem(sv, L);
+            bool found = c11_sv__index2(chars, tmp, 0) != -1;
+            if(!found) break;
             L++;
+        }
     }
     if(right) {
-        while(L < R && (data[R - 1] == ' ' || data[R - 1] == '\t' || data[R - 1] == '\n' ||
-                        data[R - 1] == '\r'))
+        while(L < R) {
+            c11_sv tmp = c11_sv__u8_getitem(sv, R - 1);
+            bool found = c11_sv__index2(chars, tmp, 0) != -1;
+            if(!found) break;
             R--;
+        }
     }
-    return c11_sv__slice2(sv, L, R);
+    int start = c11__unicode_index_to_byte(sv.data, L);
+    int stop = c11__unicode_index_to_byte(sv.data, R);
+    return c11_sv__slice2(sv, start, stop);
 }
 
 int c11_sv__index(c11_sv self, char c) {

+ 76 - 26
src/public/py_str.c

@@ -164,18 +164,18 @@ static bool _py_str__iter__(int argc, py_Ref argv) {
 
 static bool _py_str__getitem__(int argc, py_Ref argv) {
     PY_CHECK_ARGC(2);
-    c11_string* self = py_touserdata(&argv[0]);
+    c11_sv self = c11_string__sv(py_touserdata(&argv[0]));
     py_Ref _1 = py_arg(1);
     if(_1->type == tp_int) {
         int index = py_toint(py_arg(1));
-        pk__normalize_index(&index, self->size);
-        c11_sv res = c11_string__u8_getitem(self, index);
+        pk__normalize_index(&index, self.size);
+        c11_sv res = c11_sv__u8_getitem(self, index);
         py_newstrn(py_retval(), res.data, res.size);
     } else if(_1->type == tp_slice) {
         int start, stop, step;
-        bool ok = pk__parse_int_slice(_1, c11_string__u8_length(self), &start, &stop, &step);
+        bool ok = pk__parse_int_slice(_1, c11_sv__u8_length(self), &start, &stop, &step);
         if(!ok) return false;
-        c11_string* res = c11_string__u8_slice(self, start, stop, step);
+        c11_string* res = c11_sv__u8_slice(self, start, stop, step);
         py_newstrn(py_retval(), res->data, res->size);
         c11_string__delete(res);
         return true;
@@ -261,14 +261,37 @@ static bool _py_str__endswith(int argc, py_Ref argv) {
 }
 
 static bool _py_str__join(int argc, py_Ref argv) {
-    assert(false);
-    // PY_CHECK_ARGC(2);
-    // c11_sbuf buf;
-    // c11_sbuf__ctor(&buf);
-    // c11_string* sep = py_touserdata(&argv[0]);
-    // py_Ref iter = py_pushtmp();
-    // py_iter(iter, &argv[1]);
-    return false;
+    PY_CHECK_ARGC(2);
+    c11_sv self = c11_string__sv(py_touserdata(&argv[0]));
+    py_Ref _1 = py_arg(1);
+    // join a list or tuple
+    py_TValue* p;
+    int length;
+    if(py_istype(_1, tp_list)) {
+        p = py_list__getitem(_1, 0);
+        length = py_list__len(_1);
+    } else if(py_istype(_1, tp_tuple)) {
+        p = py_tuple__getitem(_1, 0);
+        length = py_tuple__len(_1);
+    } else {
+        return TypeError("join() argument must be a list or tuple");
+    }
+
+    c11_sbuf buf;
+    c11_sbuf__ctor(&buf);
+    for(int i = 0; i < length; i++) {
+        if(i > 0) c11_sbuf__write_sv(&buf, self);
+        if(!py_checkstr(&p[i])) {
+            c11_sbuf__dtor(&buf);
+            return false;
+        }
+        c11_string* item = py_touserdata(&p[i]);
+        c11_sbuf__write_cstrn(&buf, item->data, item->size);
+    }
+    c11_string* res = c11_sbuf__submit(&buf);
+    py_newstrn(py_retval(), res->data, res->size);
+    c11_string__delete(res);
+    return true;
 }
 
 static bool _py_str__replace(int argc, py_Ref argv) {
@@ -318,27 +341,53 @@ static bool _py_str__count(int argc, py_Ref argv) {
     return true;
 }
 
-static bool _py_str__strip(int argc, py_Ref argv) {
-    PY_CHECK_ARGC(1);
-    c11_string* self = py_touserdata(&argv[0]);
-    c11_sv res = c11_sv__strip(c11_string__sv(self), true, true);
+static bool _py_str__strip_impl(bool left, bool right, int argc, py_Ref argv) {
+    c11_sv self = c11_string__sv(py_touserdata(&argv[0]));
+    c11_sv chars;
+    if(argc == 1) {
+        chars = (c11_sv){" \t\n\r", 4};
+    } else if(argc == 2) {
+        if(!py_checkstr(&argv[1])) return false;
+        chars = c11_string__sv(py_touserdata(&argv[1]));
+    } else {
+        return TypeError("strip() takes at most 2 arguments");
+    }
+    c11_sv res = c11_sv__strip(self, chars, left, right);
     py_newstrn(py_retval(), res.data, res.size);
     return true;
 }
 
+static bool _py_str__strip(int argc, py_Ref argv) {
+    return _py_str__strip_impl(true, true, argc, argv);
+}
+
 static bool _py_str__lstrip(int argc, py_Ref argv) {
-    PY_CHECK_ARGC(1);
-    c11_string* self = py_touserdata(&argv[0]);
-    c11_sv res = c11_sv__strip(c11_string__sv(self), true, false);
-    py_newstrn(py_retval(), res.data, res.size);
-    return true;
+    return _py_str__strip_impl(true, false, argc, argv);
 }
 
 static bool _py_str__rstrip(int argc, py_Ref argv) {
-    PY_CHECK_ARGC(1);
-    c11_string* self = py_touserdata(&argv[0]);
-    c11_sv res = c11_sv__strip(c11_string__sv(self), false, true);
-    py_newstrn(py_retval(), res.data, res.size);
+    return _py_str__strip_impl(false, true, argc, argv);
+}
+
+static bool _py_str__zfill(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(2);
+    c11_sv self = c11_string__sv(py_touserdata(&argv[0]));
+    PY_CHECK_ARG_TYPE(1, tp_int);
+    int width = py_toint(py_arg(1));
+    int delta = width - c11_sv__u8_length(self);
+    if(delta <= 0) {
+        *py_retval() = argv[0];
+        return true;
+    }
+    c11_sbuf buf;
+    c11_sbuf__ctor(&buf);
+    for(int i = 0; i < delta; i++) {
+        c11_sbuf__write_char(&buf, '0');
+    }
+    c11_sbuf__write_sv(&buf, self);
+    c11_string* res = c11_sbuf__submit(&buf);
+    py_newstrn(py_retval(), res->data, res->size);
+    c11_string__delete(res);
     return true;
 }
 
@@ -377,6 +426,7 @@ py_Type pk_str__register() {
     py_bindmethod(tp_str, "strip", _py_str__strip);
     py_bindmethod(tp_str, "lstrip", _py_str__lstrip);
     py_bindmethod(tp_str, "rstrip", _py_str__rstrip);
+    py_bindmethod(tp_str, "zfill", _py_str__zfill);
     return type;
 }
 

+ 4 - 4
tests/04_str.py

@@ -101,10 +101,6 @@ assert s2.join( seq ) == "runoob"
 assert 'x'.zfill(5) == '0000x'
 assert '568'.zfill(1) == '568'
 
-def test(*seq):
-    return s1.join(seq)
-assert test("r", "u", "n", "o", "o", "b") == "r-u-n-o-o-b"
-
 num = 6
 assert str(num) == '6'
 
@@ -178,6 +174,10 @@ assert list(a) == ['b']
 a = '测'
 assert list(a) == ['测']
 
+def test(*seq):
+    return s1.join(seq)
+assert test("r", "u", "n", "o", "o", "b") == "r-u-n-o-o-b"
+
 # test format()
 assert "Hello, {}!".format("World") == "Hello, World!"
 assert "{} {} {}".format("I", "love", "Python") == "I love Python"