Просмотр исходного кода

Merge pull request #154 from stautonico/main

Added support for octal and binary literals
BLUELOVETH 2 лет назад
Родитель
Сommit
df4e0e329a
6 измененных файлов с 114 добавлено и 22 удалено
  1. 5 0
      build.sh
  2. 2 1
      include/pocketpy/lexer.h
  3. 2 1
      prebuild.py
  4. 94 14
      src/lexer.cpp
  5. 3 6
      src/pocketpy.cpp
  6. 8 0
      tests/01_int.py

+ 5 - 0
build.sh

@@ -21,6 +21,11 @@ echo "> Running prebuild.py... "
 
 python3 prebuild.py
 
+if [ $? -ne 0 ]; then
+    echo "prebuild.py failed."
+    exit 1
+fi
+
 SRC=$(find src/ -name "*.cpp")
 
 echo "> Compiling and linking source files... "

+ 2 - 1
include/pocketpy/lexer.h

@@ -48,7 +48,6 @@ const std::map<std::string_view, TokenIndex> kTokenKwMap = [](){
     return map;
 }();
 
-
 struct Token{
   TokenIndex type;
   const char* start;
@@ -139,4 +138,6 @@ struct Lexer {
     std::vector<Token> run();
 };
 
+bool parse_int(std::string_view text, i64* out, int base=10);
+
 } // namespace pkpy

+ 2 - 1
prebuild.py

@@ -4,7 +4,8 @@ from datetime import datetime
 def generate_python_sources():
     sources = {}
     for file in os.listdir("python"):
-        assert file.endswith(".py")
+        if not file.endswith(".py"):
+            continue
         key = file.split(".")[0]
         with open("python/" + file) as f:
             value = f.read()

+ 94 - 14
src/lexer.cpp

@@ -260,7 +260,7 @@ static bool is_unicode_Lo_char(uint32_t c) {
     }
 
     void Lexer::eat_number() {
-        PK_LOCAL_STATIC const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?(L)?");
+        PK_LOCAL_STATIC const std::regex pattern("^(0[xob])?[0-9a-fA-F]+(\\.[0-9]+)?(L)?");
         std::smatch m;
 
         const char* i = token_start;
@@ -278,22 +278,38 @@ static bool is_unicode_Lo_char(uint32_t c) {
         }
 
         if(m[1].matched && m[2].matched){
-            SyntaxError("hex literal should not contain a dot");
+            SyntaxError("binary/hex/octal literal should not contain a dot");
         }
 
-        try{
-            int base = 10;
+        int base = 10;
+        if (m[1].matched) {
+            char tag = m[1].first.base()[1];
+            switch (tag) {
+                case 'x': base = 16; break;
+                case 'o': base = 8; break;
+                case 'b': base = 2; break;
+                default: FATAL_ERROR();
+            }
+        }
+        if (m[2].matched) {
+            // float point number
+            f64 out;
             size_t size;
-            if (m[1].matched) base = 16;
-            if (m[2].matched) {
-                PK_ASSERT(base == 10);
-                add_token(TK("@num"), Number::stof(m[0], &size));
-            } else {
-                add_token(TK("@num"), (i64)std::stoll(m[0], &size, base));
+            try{
+                out = Number::stof(m[0], &size);
+                PK_ASSERT((int)size == (int)m[0].length());
+            }catch(...){
+                SyntaxError("invalid number literal");
+            }
+            add_token(TK("@num"), out);
+        } else {
+            std::string_view text(m[0].first.base(), m[0].length());
+            i64 out;
+            bool ok = parse_int(text, &out, base);
+            if(!ok){
+                SyntaxError("invalid number literal for base " + std::to_string(base));
             }
-            PK_ASSERT((int)size == (int)m.length());
-        }catch(...){
-            SyntaxError("invalid number literal");
+            add_token(TK("@num"), out);
         }
     }
 
@@ -466,4 +482,68 @@ static bool is_unicode_Lo_char(uint32_t c) {
         return std::move(nexts);
     }
 
-}   // namespace pkpy
+bool parse_int(std::string_view text, i64* out, int base){
+  // TODO: detect overflow
+  *out = 0;
+
+  const auto f_startswith_2 = [](std::string_view t, const char* prefix) -> bool{
+    if(t.length() < 2) return false;
+    return t[0] == prefix[0] && t[1] == prefix[1];
+  };
+
+  if(base == 10){
+    // 10-base  12334
+    if(text.length() == 0) return false;
+    for(char c : text){
+      if(c >= '0' && c <= '9'){
+        *out = (*out * 10) + (c - '0');
+      }else{
+        return false;
+      }
+    }
+    return true;
+  }else if(base == 2){
+    // 2-base   0b101010
+    if(f_startswith_2(text, "0b")) text.remove_prefix(2);
+    if(text.length() == 0) return false;
+    for(char c : text){
+      if(c == '0' || c == '1'){
+        *out = (*out << 1) | (c - '0');
+      }else{
+        return false;
+      }
+    }
+    return true;
+  }else if(base == 8){
+    // 8-base   0o123
+    if(f_startswith_2(text, "0o")) text.remove_prefix(2);
+    if(text.length() == 0) return false;
+    for(char c : text){
+      if(c >= '0' && c <= '7'){
+        *out = (*out << 3) | (c - '0');
+      }else{
+        return false;
+      }
+    }
+    return true;
+  }else if(base == 16){
+    // 16-base  0x123
+    if(f_startswith_2(text, "0x")) text.remove_prefix(2);
+    if(text.length() == 0) return false;
+    for(char c : text){
+      if(c >= '0' && c <= '9'){
+        *out = (*out << 4) | (c - '0');
+      }else if(c >= 'a' && c <= 'f'){
+        *out = (*out << 4) | (c - 'a' + 10);
+      }else if(c >= 'A' && c <= 'F'){
+        *out = (*out << 4) | (c - 'A' + 10);
+      }else{
+        return false;
+      }
+    }
+    return true;
+  }
+  return false;
+}
+
+}   // namespace pkpy

+ 3 - 6
src/pocketpy.cpp

@@ -434,14 +434,11 @@ void init_builtins(VM* _vm) {
             int base = 10;
             if(args.size() == 1+2) base = CAST(i64, args[2]);
             const Str& s = CAST(Str&, args[1]);
-            try{
-                size_t parsed = 0;
-                i64 val = std::stoll(s.str(), &parsed, base);
-                PK_ASSERT(parsed == s.length());
-                return VAR(val);
-            }catch(...){
+            i64 val;
+            if(!parse_int(s.sv(), &val, base)){
                 vm->ValueError("invalid literal for int(): " + s.escape());
             }
+            return VAR(val);
         }
         vm->TypeError("invalid arguments for int()");
         return vm->None;

+ 8 - 0
tests/01_int.py

@@ -5,6 +5,14 @@ assert 0x7fffffff == 2147483647
 # test 64-bit
 assert 2**60-1 + 546 - 0xfffffffffffff == 1148417904979477026
 
+# test oct literals
+assert 0o1234 == 668
+assert 0o17777777777 == 2147483647
+
+# test binary literals
+assert 0b10010 == 18
+assert 0b11111111111111111111111111111111 == 4294967295
+
 # test == != >= <= < >
 assert -1 == -1
 assert -1 != 1