Просмотр исходного кода

fixed the error case for escaped curly braces, and converted the tokenize function into a inner function

S. Mahmudul Hasan 2 лет назад
Родитель
Сommit
151e9467ae
1 измененных файлов с 81 добавлено и 64 удалено
  1. 81 64
      python/builtins.py

+ 81 - 64
python/builtins.py

@@ -96,77 +96,94 @@ def sorted(iterable, reverse=False, key=None):
     return a
 
 ##### str #####
-def tokenize(s:str) -> list:
-    tokens = []
-    L, R = 0,0
-    
-    mode = None
-    curPos = 0
-    lookingForKword = False
-    
-    while(R<len(s) and not lookingForKword):
-        curChar = s[R]
-        nextChar = s[R+1] if R+1<len(s) else ''
-        
-        # Escaping case
-        if (curChar == '{' and nextChar == '{') or (curChar == '}' and nextChar == '}'):
-            tokens.append(curChar)
-            L = R+2
-            R = R+2
-            continue
+def __f(self:str, *args, **kwargs) -> str:
+    def tokenizeString(s:str):
+        tokens = []
+        L, R = 0,0
         
-        # Regular command line arg case
-        if curChar == '{' and nextChar == '}':
-            
-            if mode is not None and mode != 'auto':
-                raise ValueError("Cannot switch from manual field numbering to automatic field specification")
-            
-            mode = 'auto'
-            # best case {}, just for normal args
-            if(L<R):
-                tokens.append(s[L:R])
-            tokens.append("{"+str(curPos)+"}")
-            curPos+=1
-            L = R+2
-            R = R+2
-            continue
+        mode = None
+        curArg = 0
+        # lookingForKword = False
         
-        # Kwarg case
-        elif (curChar == '{'):
+        while(R<len(s)):
+            curChar = s[R]
+            nextChar = s[R+1] if R+1<len(s) else ''
             
-            if mode is not None and mode != 'manual':
-                raise ValueError("Cannot switch from automatic field specification to manual field numbering")
+            # Invalid case 1: stray '}' encountered, example: "ABCD EFGH {name} IJKL}", "Hello {vv}}", "HELLO {0} WORLD}"
+            if curChar == '}' and nextChar != '}':
+                raise ValueError("Single '}' encountered in format string")        
             
-            mode = 'manual'
+            # Valid Case 1: Escaping case, we escape "{{ or "}}" to be "{" or "}", example: "{{}}", "{{My Name is {0}}}"
+            if (curChar == '{' and nextChar == '{') or (curChar == '}' and nextChar == '}'):
+                
+                if (L<R): # Valid Case 1.1: make sure we are not adding empty string
+                    tokens.append(s[L:R]) # add the string before the escape
+                
+                
+                tokens.append(curChar) # Valid Case 1.2: add the escape char
+                L = R+2 # move the left pointer to the next char
+                R = R+2 # move the right pointer to the next char
+                continue
             
-            if(L<R):
-                tokens.append(s[L:R])
+            # Valid Case 2: Regular command line arg case: example:  "ABCD EFGH {} IJKL", "{}", "HELLO {} WORLD"
+            elif curChar == '{' and nextChar == '}':
+                if mode is not None and mode != 'auto':
+                    # Invalid case 2: mixing automatic and manual field specifications -- example: "ABCD EFGH {name} IJKL {}", "Hello {vv} {}", "HELLO {0} WORLD {}" 
+                    raise ValueError("Cannot switch from manual field numbering to automatic field specification")
+                
+                mode = 'auto'
+                if(L<R): # Valid Case 2.1: make sure we are not adding empty string
+                    tokens.append(s[L:R]) # add the string before the special marker for the arg
+                
+                tokens.append("{"+str(curArg)+"}") # Valid Case 2.2: add the special marker for the arg
+                curArg+=1 # increment the arg position, this will be used for referencing the arg later
+                
+                L = R+2 # move the left pointer to the next char
+                R = R+2 # move the right pointer to the next char
+                continue
             
-            lookingForKword = True
-            kwL = R+1
-            kwR = R+1
-            while(kwR<len(s) and s[kwR]!='}'):
-                kwR += 1
-            tokens.append(s[R:kwR+1])
+            # Valid Case 3: Key-word arg case: example: "ABCD EFGH {name} IJKL", "Hello {vv}", "HELLO {name} WORLD"
+            elif (curChar == '{'):
+                
+                if mode is not None and mode != 'manual':
+                    # # Invalid case 2: mixing automatic and manual field specifications -- example: "ABCD EFGH {} IJKL {name}", "Hello {} {1}", "HELLO {} WORLD {name}"
+                    raise ValueError("Cannot switch from automatic field specification to manual field numbering")
+                
+                mode = 'manual'
+                
+                if(L<R): # Valid case 3.1: make sure we are not adding empty string
+                    tokens.append(s[L:R]) # add the string before the special marker for the arg
+                
+                # We look for the end of the keyword          
+                kwL = R # Keyword left pointer
+                kwR = R+1 # Keyword right pointer
+                while(kwR<len(s) and s[kwR]!='}'):
+                    if s[kwR] == '{': # Invalid case 3: stray '{' encountered, example: "ABCD EFGH {n{ame} IJKL {", "Hello {vv{}}", "HELLO {0} WOR{LD}"
+                        raise ValueError("Unexpected '{' in field name")
+                    kwR += 1
+                
+                # Valid case 3.2: We have successfully found the end of the keyword
+                if kwR<len(s) and s[kwR] == '}':
+                    tokens.append(s[kwL:kwR+1]) # add the special marker for the arg
+                    L = kwR+1
+                    R = kwR+1
+                    
+                # Invalid case 4: We didn't find the end of the keyword, throw error
+                else:
+                    raise ValueError("Expected '}' before end of string")
+                continue
             
-            if kwR<len(s) and s[kwR] == '}':
-                lookingForKword = False
-                L = kwR+1
-                R = kwR+1
-            continue
+            R = R+1
         
-        R = R+1
-    
-    if lookingForKword:
-        raise ValueError("Expected '}' before end of string")
-    
-    if(not lookingForKword and L<R):
-        tokens.append(s[L:R])
+        
+        # Valid case 4: We have reached the end of the string, add the remaining string to the tokens 
+        if L<R:
+            tokens.append(s[L:R])
+                
+        # print(tokens)
+        return tokens
 
-    # print("Looking for kword: ", lookingForKword)
-    return tokens
-def __f(self:str, *args, **kwargs) -> str:
-    tokens = tokenize(self)
+    tokens = tokenizeString(self)
     argMap = {}
     for i, a in enumerate(args):
         argMap[str(i)] = a
@@ -176,7 +193,7 @@ def __f(self:str, *args, **kwargs) -> str:
             key = t[1:-1]
             argMapVal = argMap.get(key, None)
             kwargsVal = kwargs.get(key, None)
-            
+                                    
             if argMapVal is None and kwargsVal is None:
                 raise ValueError("No arg found for token: "+t)
             elif argMapVal is not None:
@@ -187,7 +204,7 @@ def __f(self:str, *args, **kwargs) -> str:
             final_tokens.append(t)
     
     return ''.join(final_tokens)
- 
+
     # if '{}' in self:
     #     for i in range(len(args)):
     #         self = self.replace('{}', str(args[i]), 1)