Explorar el Código

support bytes literal, e.g. `b'\xff\12'`

blueloveTH hace 2 años
padre
commit
9732828e85

+ 1 - 0
include/pocketpy/compiler.h

@@ -85,6 +85,7 @@ class Compiler {
 
     void exprLiteral();
     void exprLong();
+    void exprBytes();
     void exprFString();
     void exprLambda();
     void exprTuple();

+ 7 - 0
include/pocketpy/expr.h

@@ -134,6 +134,13 @@ struct LongExpr: Expr{
     void emit(CodeEmitContext* ctx) override;
 };
 
+struct BytesExpr: Expr{
+    Str s;
+    BytesExpr(const Str& s): s(s) {}
+    std::string str() const override { return s.str(); }
+    void emit(CodeEmitContext* ctx) override;
+};
+
 // @num, @str which needs to invoke OP_LOAD_CONST
 struct LiteralExpr: Expr{
     TokenValue value;

+ 2 - 2
include/pocketpy/lexer.h

@@ -11,7 +11,7 @@ typedef uint8_t TokenIndex;
 constexpr const char* kTokens[] = {
     "is not", "not in", "yield from",
     "@eof", "@eol", "@sof",
-    "@id", "@num", "@str", "@fstr", "@long",
+    "@id", "@num", "@str", "@fstr", "@long", "@bytes",
     "@indent", "@dedent",
     /*****************************************/
     "+", "+=", "-", "-=",   // (INPLACE_OP - 1) can get '=' removed
@@ -100,7 +100,7 @@ enum Precedence {
   PREC_PRIMARY,
 };
 
-enum StringType { NORMAL_STRING, RAW_STRING, F_STRING };
+enum StringType { NORMAL_STRING, RAW_STRING, F_STRING, NORMAL_BYTES };
 
 struct Lexer {
     shared_ptr<SourceData> src;

+ 1 - 0
include/pocketpy/opcodes.h

@@ -39,6 +39,7 @@ OPCODE(DELETE_ATTR)
 OPCODE(DELETE_SUBSCR)
 /**************************/
 OPCODE(BUILD_LONG)
+OPCODE(BUILD_BYTES)
 OPCODE(BUILD_TUPLE)
 OPCODE(BUILD_LIST)
 OPCODE(BUILD_DICT)

+ 6 - 0
src/ceval.cpp

@@ -247,6 +247,12 @@ __NEXT_STEP:;
         if(_0 == nullptr) AttributeError(builtins, m_long);
         TOP() = call(_0, TOP());
     } DISPATCH();
+    TARGET(BUILD_BYTES) {
+        const Str& s = CAST(Str&, TOP());
+        std::vector<char> buffer(s.size);
+        memcpy(buffer.data(), s.data, s.size);
+        TOP() = VAR(Bytes(std::move(buffer)));
+    } DISPATCH();
     TARGET(BUILD_TUPLE)
         _0 = VAR(STACK_VIEW(byte.arg).to_tuple());
         STACK_SHRINK(byte.arg);

+ 5 - 0
src/compiler.cpp

@@ -87,6 +87,7 @@ namespace pkpy{
         rules[TK("@str")] =     { METHOD(exprLiteral),   NO_INFIX };
         rules[TK("@fstr")] =    { METHOD(exprFString),   NO_INFIX };
         rules[TK("@long")] =    { METHOD(exprLong),      NO_INFIX };
+        rules[TK("@bytes")] =   { METHOD(exprBytes),     NO_INFIX };
 #undef METHOD
 #undef NO_INFIX
     }
@@ -159,6 +160,10 @@ namespace pkpy{
         ctx()->s_expr.push(make_expr<LongExpr>(prev().str()));
     }
 
+    void Compiler::exprBytes(){
+        ctx()->s_expr.push(make_expr<BytesExpr>(std::get<Str>(prev().value)));
+    }
+
     void Compiler::exprFString(){
         ctx()->s_expr.push(make_expr<FStringExpr>(std::get<Str>(prev().value)));
     }

+ 6 - 0
src/expr.cpp

@@ -190,6 +190,12 @@ namespace pkpy{
         ctx->emit(OP_BUILD_LONG, BC_NOARG, line);
     }
 
+    void BytesExpr::emit(CodeEmitContext* ctx) {
+        VM* vm = ctx->vm;
+        ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(s)), line);
+        ctx->emit(OP_BUILD_BYTES, BC_NOARG, line);
+    }
+
     std::string LiteralExpr::str() const{
         if(std::holds_alternative<i64>(value)){
             return std::to_string(std::get<i64>(value));

+ 9 - 2
src/lexer.cpp

@@ -236,9 +236,13 @@ namespace pkpy{
         Str s = eat_string_until(quote, type == RAW_STRING);
         if(type == F_STRING){
             add_token(TK("@fstr"), s);
-        }else{
-            add_token(TK("@str"), s);
+            return;
+        }
+        if(type == NORMAL_BYTES){
+            add_token(TK("@bytes"), s);
+            return;
         }
+        add_token(TK("@str"), s);
     }
 
     void Lexer::eat_number() {
@@ -385,6 +389,9 @@ namespace pkpy{
                     }else if(c == 'r'){
                         if(matchchar('\'')) {eat_string('\'', RAW_STRING); return true;}
                         if(matchchar('"')) {eat_string('"', RAW_STRING); return true;}
+                    }else if(c == 'b'){
+                        if(matchchar('\'')) {eat_string('\'', NORMAL_BYTES); return true;}
+                        if(matchchar('"')) {eat_string('"', NORMAL_BYTES); return true;}
                     }
                     if (c >= '0' && c <= '9') {
                         eat_number();

+ 5 - 0
tests/10_bytes.py

@@ -0,0 +1,5 @@
+a = '12345'
+assert a.encode() == b'12345'
+
+assert b'\xff\xee' != b'1234'
+assert b'\xff\xee' == b'\xff\xee'