Przeglądaj źródła

fix `str.split`

blueloveTH 1 rok temu
rodzic
commit
104785c94b
3 zmienionych plików z 30 dodań i 18 usunięć
  1. 7 8
      src/common/str.c
  2. 8 4
      src/public/py_str.c
  3. 15 6
      tests/04_str.py

+ 7 - 8
src/common/str.c

@@ -188,15 +188,13 @@ c11_vector /* T=c11_sv */ c11_sv__split(c11_sv self, char sep) {
     int i = 0;
     int i = 0;
     for(int j = 0; j < self.size; j++) {
     for(int j = 0; j < self.size; j++) {
         if(data[j] == sep) {
         if(data[j] == sep) {
-            if(j > i) {
-                c11_sv tmp = {data + i, j - i};
-                c11_vector__push(c11_sv, &retval, tmp);
-            }
+            assert(j >= i);
+            c11_sv tmp = {data + i, j - i};
+            c11_vector__push(c11_sv, &retval, tmp);
             i = j + 1;
             i = j + 1;
-            continue;
         }
         }
     }
     }
-    if(self.size > i) {
+    if(i <= self.size) {
         c11_sv tmp = {data + i, self.size - i};
         c11_sv tmp = {data + i, self.size - i};
         c11_vector__push(c11_sv, &retval, tmp);
         c11_vector__push(c11_sv, &retval, tmp);
     }
     }
@@ -204,6 +202,7 @@ c11_vector /* T=c11_sv */ c11_sv__split(c11_sv self, char sep) {
 }
 }
 
 
 c11_vector /* T=c11_sv */ c11_sv__split2(c11_sv self, c11_sv sep) {
 c11_vector /* T=c11_sv */ c11_sv__split2(c11_sv self, c11_sv sep) {
+    if(sep.size == 1) return c11_sv__split(self, sep.data[0]);
     c11_vector retval;
     c11_vector retval;
     c11_vector__ctor(&retval, sizeof(c11_sv));
     c11_vector__ctor(&retval, sizeof(c11_sv));
     int start = 0;
     int start = 0;
@@ -212,11 +211,11 @@ c11_vector /* T=c11_sv */ c11_sv__split2(c11_sv self, c11_sv sep) {
         int i = c11_sv__index2(self, sep, start);
         int i = c11_sv__index2(self, sep, start);
         if(i == -1) break;
         if(i == -1) break;
         c11_sv tmp = {data + start, i - start};
         c11_sv tmp = {data + start, i - start};
-        if(tmp.size != 0) c11_vector__push(c11_sv, &retval, tmp);
+        c11_vector__push(c11_sv, &retval, tmp);
         start = i + sep.size;
         start = i + sep.size;
     }
     }
     c11_sv tmp = {data + start, self.size - start};
     c11_sv tmp = {data + start, self.size - start};
-    if(tmp.size != 0) c11_vector__push(c11_sv, &retval, tmp);
+    c11_vector__push(c11_sv, &retval, tmp);
     return retval;
     return retval;
 }
 }
 
 

+ 8 - 4
src/public/py_str.c

@@ -317,21 +317,25 @@ static bool str_replace(int argc, py_Ref argv) {
 static bool str_split(int argc, py_Ref argv) {
 static bool str_split(int argc, py_Ref argv) {
     c11_sv self = c11_string__sv(py_touserdata(&argv[0]));
     c11_sv self = c11_string__sv(py_touserdata(&argv[0]));
     c11_vector res;
     c11_vector res;
+    bool discard_empty = false;
     if(argc > 2) return TypeError("split() takes at most 2 arguments");
     if(argc > 2) return TypeError("split() takes at most 2 arguments");
     if(argc == 1) {
     if(argc == 1) {
-        // sep = ' '
+        // sep = None
         res = c11_sv__split(self, ' ');
         res = c11_sv__split(self, ' ');
+        discard_empty = true;
     }
     }
     if(argc == 2) {
     if(argc == 2) {
         // sep = argv[1]
         // sep = argv[1]
         if(!py_checkstr(&argv[1])) return false;
         if(!py_checkstr(&argv[1])) return false;
         c11_sv sep = c11_string__sv(py_touserdata(&argv[1]));
         c11_sv sep = c11_string__sv(py_touserdata(&argv[1]));
+        if(sep.size == 0) return ValueError("empty separator");
         res = c11_sv__split2(self, sep);
         res = c11_sv__split2(self, sep);
     }
     }
-    py_newlistn(py_retval(), res.length);
+    py_newlist(py_retval());
     for(int i = 0; i < res.length; i++) {
     for(int i = 0; i < res.length; i++) {
-        c11_sv item = c11__getitem(c11_sv, &res, i);
-        py_newstrv(py_list_getitem(py_retval(), i), item);
+        c11_sv part = c11__getitem(c11_sv, &res, i);
+        if(discard_empty && part.size == 0) continue;
+        py_newstrv(py_list_emplace(py_retval()), part);
     }
     }
     c11_vector__dtor(&res);
     c11_vector__dtor(&res);
     return true;
     return true;

+ 15 - 6
tests/04_str.py

@@ -9,7 +9,10 @@ assert 'testing5' >= 'test' + 'ing1'
 assert 'abc' + 'def' == 'abcdef'
 assert 'abc' + 'def' == 'abcdef'
 assert 'abc' * 3 == 'abcabcabc'
 assert 'abc' * 3 == 'abcabcabc'
 
 
-assert repr('\\\n\t\'\r\b\x48') == r"'\\\n\t\'\r\bH'"
+assert repr('\\\n\t\'\r\b\x48') in [
+    r"'\\\n\t\'\r\bH'",
+    '"\\\\\\n\\t\'\\r\\x08H"',
+]
 
 
 a = ''
 a = ''
 b = 'test'
 b = 'test'
@@ -46,13 +49,19 @@ assert t.startswith('this') == True;
 
 
 assert t.split('w') == ['this is string example....', 'o', '!!!']
 assert t.split('w') == ['this is string example....', 'o', '!!!']
 assert "a,b,c".split(',') == ['a', 'b', 'c']
 assert "a,b,c".split(',') == ['a', 'b', 'c']
-assert 'a,'.split(',') == ['a']
+assert 'a,'.split(',') == ['a', '']
 assert 'foo!!bar!!baz'.split('!!') == ['foo', 'bar', 'baz']
 assert 'foo!!bar!!baz'.split('!!') == ['foo', 'bar', 'baz']
 assert ' 4 3 '.split() == ['4', '3']
 assert ' 4 3 '.split() == ['4', '3']
-assert '  4 3  '.split(' ') == ['4', '3']
-
-x = 'aa bb cccc'
-assert x.split('cc') == ['aa bb ']
+assert '  4 3  '.split(' ') == ['', '', '4', '3', '', '']
+assert 'aa bb cccc'.split('cc') == ['aa bb ', '', '']
+assert '.a.b.'.split('.') == ['', 'a', 'b', '']
+assert '.a...b.'.split('.') == ['', 'a', '', '', 'b', '']
+
+try:
+    'a'.split('')
+    exit(1)
+except ValueError:
+    pass
 
 
 assert '111'.count('1') == 3
 assert '111'.count('1') == 3
 assert '111'.count('11') == 1
 assert '111'.count('11') == 1