ソースを参照

reimpl `str.format`

blueloveTH 5 ヶ月 前
コミット
62491dd99a

+ 2 - 0
include/pocketpy/interpreter/vm.h

@@ -125,6 +125,8 @@ bool pk_stack_binaryop(VM* self, py_Name op, py_Name rop);
 
 void pk_print_stack(VM* self, py_Frame* frame, Bytecode byte);
 
+bool pk_format_object(VM* self, py_Ref val, c11_sv spec);
+
 // type registration
 void pk_object__register();
 void pk_number__register();

+ 0 - 113
python/builtins.py

@@ -81,119 +81,6 @@ def sorted(iterable, key=None, reverse=False):
     a.sort(key=key, reverse=reverse)
     return a
 
-##### str #####
-def __format_string(self: str, *args, **kwargs) -> str:
-    def tokenizeString(s: str):
-        tokens = []
-        L, R = 0,0
-        
-        mode = None
-        curArg = 0
-        # lookingForKword = False
-        
-        while(R<len(s)):
-            curChar = s[R]
-            nextChar = s[R+1] if R+1<len(s) else ''
-            
-            # Invalid case 1: stray '}' encountered, example: "ABCD EFGH {name} IJKL}", "Hello {vv}}", "HELLO {0} WORLD}"
-            if curChar == '}' and nextChar != '}':
-                raise ValueError("Single '}' encountered in format string")        
-            
-            # Valid Case 1: Escaping case, we escape "{{ or "}}" to be "{" or "}", example: "{{}}", "{{My Name is {0}}}"
-            if (curChar == '{' and nextChar == '{') or (curChar == '}' and nextChar == '}'):
-                
-                if (L<R): # Valid Case 1.1: make sure we are not adding empty string
-                    tokens.append(s[L:R]) # add the string before the escape
-                
-                
-                tokens.append(curChar) # Valid Case 1.2: add the escape char
-                L = R+2 # move the left pointer to the next char
-                R = R+2 # move the right pointer to the next char
-                continue
-            
-            # Valid Case 2: Regular command line arg case: example:  "ABCD EFGH {} IJKL", "{}", "HELLO {} WORLD"
-            elif curChar == '{' and nextChar == '}':
-                if mode is not None and mode != 'auto':
-                    # Invalid case 2: mixing automatic and manual field specifications -- example: "ABCD EFGH {name} IJKL {}", "Hello {vv} {}", "HELLO {0} WORLD {}" 
-                    raise ValueError("Cannot switch from manual field numbering to automatic field specification")
-                
-                mode = 'auto'
-                if(L<R): # Valid Case 2.1: make sure we are not adding empty string
-                    tokens.append(s[L:R]) # add the string before the special marker for the arg
-                
-                tokens.append("{"+str(curArg)+"}") # Valid Case 2.2: add the special marker for the arg
-                curArg+=1 # increment the arg position, this will be used for referencing the arg later
-                
-                L = R+2 # move the left pointer to the next char
-                R = R+2 # move the right pointer to the next char
-                continue
-            
-            # Valid Case 3: Key-word arg case: example: "ABCD EFGH {name} IJKL", "Hello {vv}", "HELLO {name} WORLD"
-            elif (curChar == '{'):
-                
-                if mode is not None and mode != 'manual':
-                    # # Invalid case 2: mixing automatic and manual field specifications -- example: "ABCD EFGH {} IJKL {name}", "Hello {} {1}", "HELLO {} WORLD {name}"
-                    raise ValueError("Cannot switch from automatic field specification to manual field numbering")
-                
-                mode = 'manual'
-                
-                if(L<R): # Valid case 3.1: make sure we are not adding empty string
-                    tokens.append(s[L:R]) # add the string before the special marker for the arg
-                
-                # We look for the end of the keyword          
-                kwL = R # Keyword left pointer
-                kwR = R+1 # Keyword right pointer
-                while(kwR<len(s) and s[kwR]!='}'):
-                    if s[kwR] == '{': # Invalid case 3: stray '{' encountered, example: "ABCD EFGH {n{ame} IJKL {", "Hello {vv{}}", "HELLO {0} WOR{LD}"
-                        raise ValueError("Unexpected '{' in field name")
-                    kwR += 1
-                
-                # Valid case 3.2: We have successfully found the end of the keyword
-                if kwR<len(s) and s[kwR] == '}':
-                    tokens.append(s[kwL:kwR+1]) # add the special marker for the arg
-                    L = kwR+1
-                    R = kwR+1
-                    
-                # Invalid case 4: We didn't find the end of the keyword, throw error
-                else:
-                    raise ValueError("Expected '}' before end of string")
-                continue
-            
-            R = R+1
-        
-        
-        # Valid case 4: We have reached the end of the string, add the remaining string to the tokens 
-        if L<R:
-            tokens.append(s[L:R])
-                
-        # print(tokens)
-        return tokens
-
-    tokens = tokenizeString(self)
-    argMap = {}
-    for i, a in enumerate(args):
-        argMap[str(i)] = a
-    final_tokens = []
-    for t in tokens:
-        if t[0] == '{' and t[-1] == '}':
-            key = t[1:-1]
-            argMapVal = argMap.get(key, None)
-            kwargsVal = kwargs.get(key, None)
-                                    
-            if argMapVal is None and kwargsVal is None:
-                raise ValueError("No arg found for token: "+t)
-            elif argMapVal is not None:
-                final_tokens.append(str(argMapVal))
-            else:
-                final_tokens.append(str(kwargsVal))
-        else:
-            final_tokens.append(t)
-    
-    return ''.join(final_tokens)
-
-str.format = __format_string
-del __format_string
-
 
 def help(obj):
     if hasattr(obj, '__func__'):

+ 96 - 0
src/bindings/py_str.c

@@ -1,9 +1,11 @@
 #include "pocketpy/common/str.h"
+#include "pocketpy/objects/base.h"
 #include "pocketpy/pocketpy.h"
 
 #include "pocketpy/objects/object.h"
 #include "pocketpy/interpreter/vm.h"
 #include "pocketpy/common/sstream.h"
+#include <stdbool.h>
 
 c11_string* pk_tostr(py_Ref self) {
     assert(self->type == tp_str);
@@ -394,6 +396,99 @@ static bool str_encode(int argc, py_Ref argv) {
     return true;
 }
 
+static bool str_format(int argc, py_Ref argv) {
+    c11_sv self = py_tosv(argv);
+    py_Ref args = argv + 1;
+    int64_t auto_field_index = -1;
+    bool manual_field_used = false;
+    const char* p_begin = self.data;
+    const char* p_end = self.data + self.size;
+    const char* p = p_begin;
+    c11_sbuf buf;
+    c11_sbuf__ctor(&buf);
+    while(p < p_end) {
+        if(*p == '{') {
+            if((p + 1) < p_end && p[1] == '{') {
+                // '{{' -> '{'
+                c11_sbuf__write_char(&buf, '{');
+                p += 2;
+            } else {
+                if((p + 1) >= p_end) {
+                    return ValueError("single '{' encountered in format string");
+                }
+                p++;
+                // parse field
+                c11_sv field = {p, 0};
+                while(p < p_end && *p != '}' && *p != ':') {
+                    p++;
+                }
+                if(p < p_end) field.size = p - field.data;
+                // parse spec
+                c11_sv spec = {p, 0};
+                if(*p == ':') {
+                    while(p < p_end && *p != '}') {
+                        p++;
+                    }
+                    if(p < p_end) spec.size = p - spec.data;
+                }
+                if(p < p_end) {
+                    c11__rtassert(*p == '}');
+                } else {
+                    return ValueError("expected '}' before end of string");
+                }
+                // parse auto field
+                int64_t arg_index;
+                if(field.size > 0) {  // {0}
+                    if(auto_field_index >= 0) {
+                        return ValueError(
+                            "cannot switch from automatic field numbering to manual field specification");
+                    }
+                    IntParsingResult res = c11__parse_uint(field, &arg_index, 10);
+                    if(res != IntParsing_SUCCESS) {
+                        return ValueError("only integer field name is supported");
+                    }
+                    manual_field_used = true;
+                } else {  // {}
+                    if(manual_field_used) {
+                        return ValueError(
+                            "cannot switch from manual field specification to automatic field numbering");
+                    }
+                    auto_field_index++;
+                    arg_index = auto_field_index;
+                }
+                // do format
+                if(arg_index < 0 || arg_index >= (argc - 1)) {
+                    return IndexError("replacement index %i out of range for positional args tuple",
+                                      arg_index);
+                }
+                bool ok = pk_format_object(pk_current_vm, &args[arg_index], spec);
+                if(!ok) {
+                    c11_sbuf__dtor(&buf);
+                    return false;
+                }
+                // append to buf
+                c11__rtassert(py_isstr(py_retval()));
+                c11_sv formatted = py_tosv(py_retval());
+                c11_sbuf__write_sv(&buf, formatted);
+                p++;  // skip '}'
+            }
+        } else if(*p == '}') {
+            if((p + 1) < p_end && p[1] == '}') {
+                // '}}' -> '}'
+                c11_sbuf__write_char(&buf, '}');
+                p += 2;
+            } else {
+                return ValueError("single '}' encountered in format string");
+            }
+        } else {
+            c11_sbuf__write_char(&buf, *p);
+            p++;
+        }
+    }
+    c11_sbuf__py_submit(&buf, py_retval());
+    return true;
+}
+
 py_Type pk_str__register() {
     py_Type type = pk_newtype("str", tp_object, NULL, NULL, false, true);
     // no need to dtor because the memory is controlled by the object
@@ -434,6 +529,7 @@ py_Type pk_str__register() {
     py_bindmethod(tp_str, "find", str_find);
     py_bindmethod(tp_str, "index", str_index);
     py_bindmethod(tp_str, "encode", str_encode);
+    py_bindmethod(tp_str, "format", str_format);
     return type;
 }
 

ファイルの差分が大きいため隠しています
+ 0 - 0
src/common/_generated.c


+ 3 - 5
src/interpreter/ceval.c

@@ -11,8 +11,6 @@
 #include <assert.h>
 #include <time.h>
 
-static bool format_object(VM* self, py_Ref val, c11_sv spec);
-
 #define DISPATCH()                                                                                 \
     do {                                                                                           \
         frame->ip++;                                                                               \
@@ -1191,7 +1189,7 @@ __NEXT_STEP:
         //////////////////
         case OP_FORMAT_STRING: {
             py_Ref spec = c11__at(py_TValue, &frame->co->consts, byte.arg);
-            bool ok = format_object(self, TOP(), py_tosv(spec));
+            bool ok = pk_format_object(self, TOP(), py_tosv(spec));
             if(!ok) goto __ERROR;
             DISPATCH();
         }
@@ -1298,9 +1296,9 @@ bool pk_stack_binaryop(VM* self, py_Name op, py_Name rop) {
                      rhs_t);
 }
 
-static bool format_object(VM* self, py_Ref val, c11_sv spec) {
+bool pk_format_object(VM* self, py_Ref val, c11_sv spec) {
     // format TOS via `spec` inplace
-    // spec: '!r:.2f', '.2f'
+    // spec: '!r:.2f', ':.2f', '.2f'
     if(spec.size == 0) return py_str(val);
 
     if(spec.data[0] == '!') {

+ 2 - 2
tests/04_str.py

@@ -209,8 +209,8 @@ assert "{0} {1} {2}".format("I", "love", "Python") == "I love Python"
 assert "{2} {1} {0}".format("I", "love", "Python") == "Python love I"
 assert "{0}{1}{0}".format("abra", "cad") == "abracadabra"
 
-assert "{k}={v}".format(k="key", v="value") == "key=value"
-assert "{k}={k}".format(k="key") == "key=key"
+# assert "{k}={v}".format(k="key", v="value") == "key=value"
+# assert "{k}={k}".format(k="key") == "key=key"
 assert "{0}={1}".format('{0}', '{1}') == "{0}={1}"
 assert "{{{0}}}".format(1) == "{1}"
 assert "{0}{1}{1}".format(1, 2, 3) == "122"

この差分においてかなりの量のファイルが変更されているため、一部のファイルを表示していません