blueloveTH 3 лет назад
Родитель
Сommit
343c43c63e
4 измененных файлов с 60 добавлено и 26 удалено
  1. 11 14
      src/compiler.h
  2. 1 1
      src/obj.h
  3. 46 10
      src/parser.h
  4. 2 1
      src/str.h

+ 11 - 14
src/compiler.h

@@ -146,26 +146,23 @@ public:
         while(*i != '\n' && *i != '\0') i++;
         std::string s = std::string(parser->token_start, i);
 
-        size_t* size = new size_t;
-
         try{
             if (std::regex_search(s, m, pattern)) {
                 // here is m.length()-1, since the first char is eaten by lexToken()
                 for(int j=0; j<m.length()-1; j++) parser->eatChar();
 
                 int base = 10;
+                size_t size;
                 if (m[1].matched) base = 16;
                 if (m[2].matched) {
                     if(base == 16) syntaxError("hex literal should not contain a dot");
-                    parser->setNextToken(TK("@num"), vm->PyFloat(std::stod(m[0], size)));
+                    parser->setNextToken(TK("@num"), vm->PyFloat(std::stod(m[0], &size)));
                 } else {
-                    parser->setNextToken(TK("@num"), vm->PyInt(std::stoll(m[0], size, base)));
+                    parser->setNextToken(TK("@num"), vm->PyInt(std::stoll(m[0], &size, base)));
                 }
-                if (*size != m.length()) throw std::runtime_error("length mismatch");
-                delete size;
+                if (size != m.length()) throw std::runtime_error("length mismatch");
             }
         }catch(std::exception& e){
-            delete size;
             syntaxError("invalid number literal");
         } 
     }
@@ -241,15 +238,15 @@ public:
                     return;
                 }
                 default: {
+                    if(c == 'f'){
+                        if(parser->matchChar('\'')) {eatString('\'', true); return;}
+                        if(parser->matchChar('"')) {eatString('"', true); return;}
+                    }
                     if (isdigit(c)) {
                         eatNumber();
-                    } else if (isalpha(c) || c=='_') {
-                        // 可以支持中文编程
-                        if(c == 'f'){
-                            if(parser->matchChar('\'')) {eatString('\'', true); return;}
-                            if(parser->matchChar('"')) {eatString('"', true); return;}
-                        }
-                        parser->eatName();
+                    } else if (parser->isNameStart(c)) {
+                        int ret = parser->eatName();
+                        if(ret!=0) syntaxError("identifier is illegal, err " + std::to_string(ret));
                     } else {
                         syntaxError("unknown character: " + _Str(1, c));
                     }

+ 1 - 1
src/obj.h

@@ -10,7 +10,7 @@ const _Int _Int_MAX_NEG = -9223372036854775807LL;
 const _Float _FLOAT_INF_POS = INFINITY;
 const _Float _FLOAT_INF_NEG = -INFINITY;
 
-#define PK_VERSION "0.2.4"
+#define PK_VERSION "0.2.5"
 
 class CodeObject;
 class BasePointer;

+ 46 - 10
src/parser.h

@@ -175,34 +175,70 @@ struct Parser {
         return c;
     }
 
-    void eatName() {
-        char c = peekChar();
-        while (isalpha(c) || c=='_' || isdigit(c)) {
-            eatChar();
-            c = peekChar();
+    inline bool isNameStart(char c){
+        if(isalpha(c) || c=='_') return true;
+        if(!isascii(c)) return true;
+        return false;
+    }
+
+    int eatName() {
+        current_char--;
+        while(true){
+            uint8_t c = peekChar();
+            //printf("eatName: %d = %c\n", (int)c, c);
+            int u8bytes = 0;
+            if((c & 0b10000000) == 0b00000000) u8bytes = 1;
+            else if((c & 0b11100000) == 0b11000000) u8bytes = 2;
+            else if((c & 0b11110000) == 0b11100000) u8bytes = 3;
+            else if((c & 0b11111000) == 0b11110000) u8bytes = 4;
+            else return 1;
+            std::string u8str(current_char, u8bytes);
+            //printf("%s %d %c\n", u8str.c_str(), u8bytes, c);
+            if(u8str.size() != u8bytes) return 2;
+            if(u8bytes == 1){
+                if(isalpha(c) || c=='_' || isdigit(c)) goto __EAT_ALL_BYTES;
+            }else{
+                uint32_t value = 0;
+                for(int k=0; k < u8bytes; k++){
+                    uint8_t b = u8str[k];
+                    if(k==0){
+                        if(u8bytes == 2) value = (b & 0b00011111) << 6;
+                        else if(u8bytes == 3) value = (b & 0b00001111) << 12;
+                        else if(u8bytes == 4) value = (b & 0b00000111) << 18;
+                    }else{
+                        value |= (b & 0b00111111) << (6*(u8bytes-k-1));
+                    }
+                }
+                // printf("value: %d", value);
+                if(__isLoChar(value)) goto __EAT_ALL_BYTES;
+            }
+            break;
+__EAT_ALL_BYTES:
+            current_char += u8bytes;
         }
 
-        const char* name_start = token_start;
-        int length = (int)(current_char - name_start);
-        std::string_view name(name_start, length);
+        int length = (int)(current_char - token_start);
+        if(length == 0) return 3;
+        std::string_view name(token_start, length);
         if(__KW_MAP.count(name)){
             if(name == "not"){
                 if(strncmp(current_char, " in", 3) == 0){
                     current_char += 3;
                     setNextToken(TK("not in"));
-                    return;
+                    return 0;
                 }
             }else if(name == "is"){
                 if(strncmp(current_char, " not", 4) == 0){
                     current_char += 4;
                     setNextToken(TK("is not"));
-                    return;
+                    return 0;
                 }
             }
             setNextToken(__KW_MAP.at(name));
         } else {
             setNextToken(TK("@id"));
         }
+        return 0;
     }
 
     void skipLineComment() {

Разница между файлами не показана из-за своего большого размера
+ 2 - 1
src/str.h


Некоторые файлы не были показаны из-за большого количества измененных файлов