compiler.cpp 55 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428
  1. #include "pocketpy/compiler.h"
  2. namespace pkpy{
  3. PrattRule Compiler::rules[kTokenCount];
  4. NameScope Compiler::name_scope() const {
  5. auto s = contexts.size()>1 ? NAME_LOCAL : NAME_GLOBAL;
  6. if(unknown_global_scope && s == NAME_GLOBAL) s = NAME_GLOBAL_UNKNOWN;
  7. return s;
  8. }
  9. CodeObject_ Compiler::push_global_context(){
  10. CodeObject_ co = std::make_shared<CodeObject>(lexer.src, lexer.src->filename);
  11. co->start_line = i==0 ? 1 : prev().line;
  12. contexts.push(CodeEmitContext(vm, co, contexts.size()));
  13. return co;
  14. }
  15. FuncDecl_ Compiler::push_f_context(Str name){
  16. FuncDecl_ decl = std::make_shared<FuncDecl>();
  17. decl->code = std::make_shared<CodeObject>(lexer.src, name);
  18. decl->code->start_line = i==0 ? 1 : prev().line;
  19. decl->nested = name_scope() == NAME_LOCAL;
  20. contexts.push(CodeEmitContext(vm, decl->code, contexts.size()));
  21. contexts.top().func = decl;
  22. return decl;
  23. }
  24. void Compiler::pop_context(){
  25. if(!ctx()->s_expr.empty()){
  26. throw std::runtime_error("!ctx()->s_expr.empty()");
  27. }
  28. // add a `return None` in the end as a guard
  29. // previously, we only do this if the last opcode is not a return
  30. // however, this is buggy...since there may be a jump to the end (out of bound) even if the last opcode is a return
  31. ctx()->emit_(OP_RETURN_VALUE, 1, BC_KEEPLINE, true);
  32. // find the last valid token
  33. int j = i-1;
  34. while(tokens[j].type == TK("@eol") || tokens[j].type == TK("@dedent") || tokens[j].type == TK("@eof")) j--;
  35. ctx()->co->end_line = tokens[j].line;
  36. // some check here
  37. auto& codes = ctx()->co->codes;
  38. if(ctx()->co->varnames.size() > PK_MAX_CO_VARNAMES){
  39. SyntaxError("maximum number of local variables exceeded");
  40. }
  41. if(ctx()->co->consts.size() > 65530){
  42. SyntaxError("maximum number of constants exceeded");
  43. }
  44. if(codes.size() > 65530 && ctx()->co->src->mode != JSON_MODE){
  45. // json mode does not contain jump instructions, so it is safe to ignore this check
  46. SyntaxError("maximum number of opcodes exceeded");
  47. }
  48. // pre-compute LOOP_BREAK and LOOP_CONTINUE
  49. for(int i=0; i<codes.size(); i++){
  50. Bytecode& bc = codes[i];
  51. if(bc.op == OP_LOOP_CONTINUE){
  52. bc.arg = ctx()->co->blocks[bc.arg].start;
  53. }else if(bc.op == OP_LOOP_BREAK){
  54. bc.arg = ctx()->co->blocks[bc.arg].get_break_end();
  55. }
  56. }
  57. // pre-compute func->is_simple
  58. FuncDecl_ func = contexts.top().func;
  59. if(func){
  60. // check generator
  61. for(Bytecode bc: func->code->codes){
  62. if(bc.op == OP_YIELD_VALUE || bc.op == OP_FOR_ITER_YIELD_VALUE){
  63. func->type = FuncType::GENERATOR;
  64. for(Bytecode bc: func->code->codes){
  65. if(bc.op == OP_RETURN_VALUE && bc.arg == BC_NOARG){
  66. SyntaxError("'return' with argument inside generator function");
  67. }
  68. }
  69. break;
  70. }
  71. }
  72. if(func->type == FuncType::UNSET){
  73. bool is_simple = true;
  74. if(func->kwargs.size() > 0) is_simple = false;
  75. if(func->starred_arg >= 0) is_simple = false;
  76. if(func->starred_kwarg >= 0) is_simple = false;
  77. if(is_simple){
  78. func->type = FuncType::SIMPLE;
  79. bool is_empty = false;
  80. if(func->code->codes.size() == 1){
  81. Bytecode bc = func->code->codes[0];
  82. if(bc.op == OP_RETURN_VALUE && bc.arg == 1){
  83. is_empty = true;
  84. }
  85. }
  86. if(is_empty) func->type = FuncType::EMPTY;
  87. }
  88. else func->type = FuncType::NORMAL;
  89. }
  90. PK_ASSERT(func->type != FuncType::UNSET);
  91. }
  92. contexts.pop();
  93. }
  94. void Compiler::init_pratt_rules(){
  95. PK_LOCAL_STATIC bool initialized = false;
  96. if(initialized) return;
  97. initialized = true;
  98. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  99. #define PK_METHOD(name) &Compiler::name
  100. #define PK_NO_INFIX nullptr, PREC_LOWEST
  101. for(TokenIndex i=0; i<kTokenCount; i++) rules[i] = { nullptr, PK_NO_INFIX };
  102. rules[TK(".")] = { nullptr, PK_METHOD(exprAttrib), PREC_PRIMARY };
  103. rules[TK("(")] = { PK_METHOD(exprGroup), PK_METHOD(exprCall), PREC_PRIMARY };
  104. rules[TK("[")] = { PK_METHOD(exprList), PK_METHOD(exprSubscr), PREC_PRIMARY };
  105. rules[TK("{")] = { PK_METHOD(exprMap), PK_NO_INFIX };
  106. rules[TK("%")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
  107. rules[TK("+")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_TERM };
  108. rules[TK("-")] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_TERM };
  109. rules[TK("*")] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_FACTOR };
  110. rules[TK("~")] = { PK_METHOD(exprUnaryOp), nullptr, PREC_UNARY };
  111. rules[TK("/")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
  112. rules[TK("//")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
  113. rules[TK("**")] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_EXPONENT };
  114. rules[TK(">")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  115. rules[TK("<")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  116. rules[TK("==")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  117. rules[TK("!=")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  118. rules[TK(">=")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  119. rules[TK("<=")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  120. rules[TK("in")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  121. rules[TK("is")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  122. rules[TK("<<")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  123. rules[TK(">>")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  124. rules[TK("&")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_AND };
  125. rules[TK("|")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_OR };
  126. rules[TK("^")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  127. rules[TK("@")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
  128. rules[TK("if")] = { nullptr, PK_METHOD(exprTernary), PREC_TERNARY };
  129. rules[TK("not in")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  130. rules[TK("is not")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  131. rules[TK("and") ] = { nullptr, PK_METHOD(exprAnd), PREC_LOGICAL_AND };
  132. rules[TK("or")] = { nullptr, PK_METHOD(exprOr), PREC_LOGICAL_OR };
  133. rules[TK("not")] = { PK_METHOD(exprNot), nullptr, PREC_LOGICAL_NOT };
  134. rules[TK("True")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
  135. rules[TK("False")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
  136. rules[TK("None")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
  137. rules[TK("...")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
  138. rules[TK("lambda")] = { PK_METHOD(exprLambda), PK_NO_INFIX };
  139. rules[TK("@id")] = { PK_METHOD(exprName), PK_NO_INFIX };
  140. rules[TK("@num")] = { PK_METHOD(exprLiteral), PK_NO_INFIX };
  141. rules[TK("@str")] = { PK_METHOD(exprLiteral), PK_NO_INFIX };
  142. rules[TK("@fstr")] = { PK_METHOD(exprFString), PK_NO_INFIX };
  143. rules[TK("@long")] = { PK_METHOD(exprLong), PK_NO_INFIX };
  144. rules[TK("@imag")] = { PK_METHOD(exprImag), PK_NO_INFIX };
  145. rules[TK("@bytes")] = { PK_METHOD(exprBytes), PK_NO_INFIX };
  146. rules[TK(":")] = { PK_METHOD(exprSlice0), PK_METHOD(exprSlice1), PREC_PRIMARY };
  147. #undef PK_METHOD
  148. #undef PK_NO_INFIX
  149. }
  150. bool Compiler::match(TokenIndex expected) {
  151. if (curr().type != expected) return false;
  152. advance();
  153. return true;
  154. }
  155. void Compiler::consume(TokenIndex expected) {
  156. if (!match(expected)){
  157. SyntaxError(
  158. _S("expected '", TK_STR(expected), "', got '", TK_STR(curr().type), "'")
  159. );
  160. }
  161. }
  162. bool Compiler::match_newlines_repl(){
  163. return match_newlines(mode()==REPL_MODE);
  164. }
  165. bool Compiler::match_newlines(bool repl_throw) {
  166. bool consumed = false;
  167. if (curr().type == TK("@eol")) {
  168. while (curr().type == TK("@eol")) advance();
  169. consumed = true;
  170. }
  171. if (repl_throw && curr().type == TK("@eof")){
  172. throw NeedMoreLines(ctx()->is_compiling_class);
  173. }
  174. return consumed;
  175. }
  176. bool Compiler::match_end_stmt() {
  177. if (match(TK(";"))) { match_newlines(); return true; }
  178. if (match_newlines() || curr().type == TK("@eof")) return true;
  179. if (curr().type == TK("@dedent")) return true;
  180. return false;
  181. }
  182. void Compiler::consume_end_stmt() {
  183. if (!match_end_stmt()) SyntaxError("expected statement end");
  184. }
  185. void Compiler::EXPR() {
  186. parse_expression(PREC_LOWEST+1);
  187. }
  188. void Compiler::EXPR_TUPLE(bool allow_slice) {
  189. parse_expression(PREC_LOWEST+1, allow_slice);
  190. if(!match(TK(","))) return;
  191. // tuple expression
  192. Expr_vector items;
  193. items.push_back(ctx()->s_expr.popx());
  194. do {
  195. if(curr().brackets_level) match_newlines_repl();
  196. if(!is_expression(allow_slice)) break;
  197. parse_expression(PREC_LOWEST+1, allow_slice);
  198. items.push_back(ctx()->s_expr.popx());
  199. if(curr().brackets_level) match_newlines_repl();
  200. } while(match(TK(",")));
  201. ctx()->s_expr.push(make_expr<TupleExpr>(std::move(items)));
  202. }
  203. // special case for `for loop` and `comp`
  204. Expr_ Compiler::EXPR_VARS(){
  205. Expr_vector items;
  206. do {
  207. consume(TK("@id"));
  208. items.push_back(make_expr<NameExpr>(prev().str(), name_scope()));
  209. } while(match(TK(",")));
  210. if(items.size()==1) return std::move(items[0]);
  211. return make_expr<TupleExpr>(std::move(items));
  212. }
  213. void Compiler::exprLiteral(){
  214. ctx()->s_expr.push(make_expr<LiteralExpr>(prev().value));
  215. }
  216. void Compiler::exprLong(){
  217. ctx()->s_expr.push(make_expr<LongExpr>(prev().str()));
  218. }
  219. void Compiler::exprImag(){
  220. ctx()->s_expr.push(make_expr<ImagExpr>(std::get<f64>(prev().value)));
  221. }
  222. void Compiler::exprBytes(){
  223. ctx()->s_expr.push(make_expr<BytesExpr>(std::get<Str>(prev().value)));
  224. }
  225. void Compiler::exprFString(){
  226. ctx()->s_expr.push(make_expr<FStringExpr>(std::get<Str>(prev().value)));
  227. }
  228. void Compiler::exprLambda(){
  229. FuncDecl_ decl = push_f_context("<lambda>");
  230. auto e = make_expr<LambdaExpr>(decl);
  231. if(!match(TK(":"))){
  232. _compile_f_args(e->decl, false);
  233. consume(TK(":"));
  234. }
  235. // https://github.com/pocketpy/pocketpy/issues/37
  236. parse_expression(PREC_LAMBDA + 1);
  237. ctx()->emit_expr();
  238. ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  239. pop_context();
  240. ctx()->s_expr.push(std::move(e));
  241. }
  242. void Compiler::exprOr(){
  243. auto e = make_expr<OrExpr>();
  244. e->lhs = ctx()->s_expr.popx();
  245. parse_expression(PREC_LOGICAL_OR + 1);
  246. e->rhs = ctx()->s_expr.popx();
  247. ctx()->s_expr.push(std::move(e));
  248. }
  249. void Compiler::exprAnd(){
  250. auto e = make_expr<AndExpr>();
  251. e->lhs = ctx()->s_expr.popx();
  252. parse_expression(PREC_LOGICAL_AND + 1);
  253. e->rhs = ctx()->s_expr.popx();
  254. ctx()->s_expr.push(std::move(e));
  255. }
  256. void Compiler::exprTernary(){
  257. auto e = make_expr<TernaryExpr>();
  258. e->true_expr = ctx()->s_expr.popx();
  259. // cond
  260. parse_expression(PREC_TERNARY + 1);
  261. e->cond = ctx()->s_expr.popx();
  262. consume(TK("else"));
  263. // if false
  264. parse_expression(PREC_TERNARY + 1);
  265. e->false_expr = ctx()->s_expr.popx();
  266. ctx()->s_expr.push(std::move(e));
  267. }
  268. void Compiler::exprBinaryOp(){
  269. auto e = make_expr<BinaryExpr>();
  270. e->op = prev().type;
  271. e->lhs = ctx()->s_expr.popx();
  272. parse_expression(rules[e->op].precedence + 1);
  273. e->rhs = ctx()->s_expr.popx();
  274. ctx()->s_expr.push(std::move(e));
  275. }
  276. void Compiler::exprNot() {
  277. parse_expression(PREC_LOGICAL_NOT + 1);
  278. ctx()->s_expr.push(make_expr<NotExpr>(ctx()->s_expr.popx()));
  279. }
  280. void Compiler::exprUnaryOp(){
  281. TokenIndex op = prev().type;
  282. parse_expression(PREC_UNARY + 1);
  283. switch(op){
  284. case TK("-"):
  285. ctx()->s_expr.push(make_expr<NegatedExpr>(ctx()->s_expr.popx()));
  286. break;
  287. case TK("~"):
  288. ctx()->s_expr.push(make_expr<InvertExpr>(ctx()->s_expr.popx()));
  289. break;
  290. case TK("*"):
  291. ctx()->s_expr.push(make_expr<StarredExpr>(1, ctx()->s_expr.popx()));
  292. break;
  293. case TK("**"):
  294. ctx()->s_expr.push(make_expr<StarredExpr>(2, ctx()->s_expr.popx()));
  295. break;
  296. default: PK_FATAL_ERROR();
  297. }
  298. }
  299. void Compiler::exprGroup(){
  300. match_newlines_repl();
  301. EXPR_TUPLE(); // () is just for change precedence
  302. match_newlines_repl();
  303. consume(TK(")"));
  304. if(ctx()->s_expr.top()->is_tuple()) return;
  305. Expr_ g = make_expr<GroupedExpr>(ctx()->s_expr.popx());
  306. ctx()->s_expr.push(std::move(g));
  307. }
  308. void Compiler::consume_comp(unique_ptr_128<CompExpr> ce, Expr_ expr){
  309. ce->expr = std::move(expr);
  310. ce->vars = EXPR_VARS();
  311. consume(TK("in"));
  312. parse_expression(PREC_TERNARY + 1);
  313. ce->iter = ctx()->s_expr.popx();
  314. match_newlines_repl();
  315. if(match(TK("if"))){
  316. parse_expression(PREC_TERNARY + 1);
  317. ce->cond = ctx()->s_expr.popx();
  318. }
  319. ctx()->s_expr.push(std::move(ce));
  320. match_newlines_repl();
  321. }
  322. void Compiler::exprList() {
  323. int line = prev().line;
  324. Expr_vector items;
  325. do {
  326. match_newlines_repl();
  327. if (curr().type == TK("]")) break;
  328. EXPR();
  329. items.push_back(ctx()->s_expr.popx());
  330. match_newlines_repl();
  331. if(items.size()==1 && match(TK("for"))){
  332. consume_comp(make_expr<ListCompExpr>(), std::move(items[0]));
  333. consume(TK("]"));
  334. return;
  335. }
  336. match_newlines_repl();
  337. } while (match(TK(",")));
  338. consume(TK("]"));
  339. auto e = make_expr<ListExpr>(std::move(items));
  340. e->line = line; // override line
  341. ctx()->s_expr.push(std::move(e));
  342. }
  343. void Compiler::exprMap() {
  344. bool parsing_dict = false; // {...} may be dict or set
  345. Expr_vector items;
  346. do {
  347. match_newlines_repl();
  348. if (curr().type == TK("}")) break;
  349. EXPR();
  350. int star_level = ctx()->s_expr.top()->star_level();
  351. if(star_level==2 || curr().type == TK(":")){
  352. parsing_dict = true;
  353. }
  354. if(parsing_dict){
  355. auto dict_item = make_expr<DictItemExpr>();
  356. if(star_level == 2){
  357. dict_item->key = nullptr;
  358. dict_item->value = ctx()->s_expr.popx();
  359. }else{
  360. consume(TK(":"));
  361. EXPR();
  362. dict_item->key = ctx()->s_expr.popx();
  363. dict_item->value = ctx()->s_expr.popx();
  364. }
  365. items.push_back(std::move(dict_item));
  366. }else{
  367. items.push_back(ctx()->s_expr.popx());
  368. }
  369. match_newlines_repl();
  370. if(items.size()==1 && match(TK("for"))){
  371. if(parsing_dict) consume_comp(make_expr<DictCompExpr>(), std::move(items[0]));
  372. else consume_comp(make_expr<SetCompExpr>(), std::move(items[0]));
  373. consume(TK("}"));
  374. return;
  375. }
  376. match_newlines_repl();
  377. } while (match(TK(",")));
  378. consume(TK("}"));
  379. if(items.size()==0 || parsing_dict){
  380. auto e = make_expr<DictExpr>(std::move(items));
  381. ctx()->s_expr.push(std::move(e));
  382. }else{
  383. auto e = make_expr<SetExpr>(std::move(items));
  384. ctx()->s_expr.push(std::move(e));
  385. }
  386. }
  387. void Compiler::exprCall() {
  388. auto e = make_expr<CallExpr>();
  389. e->callable = ctx()->s_expr.popx();
  390. do {
  391. match_newlines_repl();
  392. if (curr().type==TK(")")) break;
  393. if(curr().type==TK("@id") && next().type==TK("=")) {
  394. consume(TK("@id"));
  395. Str key = prev().str();
  396. consume(TK("="));
  397. EXPR();
  398. e->kwargs.push_back({key, ctx()->s_expr.popx()});
  399. } else{
  400. EXPR();
  401. if(ctx()->s_expr.top()->star_level() == 2){
  402. // **kwargs
  403. e->kwargs.push_back({"**", ctx()->s_expr.popx()});
  404. }else{
  405. // positional argument
  406. if(!e->kwargs.empty()) SyntaxError("positional argument follows keyword argument");
  407. e->args.push_back(ctx()->s_expr.popx());
  408. }
  409. }
  410. match_newlines_repl();
  411. } while (match(TK(",")));
  412. consume(TK(")"));
  413. if(e->args.size() > 32767) SyntaxError("too many positional arguments");
  414. if(e->kwargs.size() > 32767) SyntaxError("too many keyword arguments");
  415. ctx()->s_expr.push(std::move(e));
  416. }
  417. void Compiler::exprName(){
  418. Str name = prev().str();
  419. NameScope scope = name_scope();
  420. if(ctx()->global_names.count(name)){
  421. scope = NAME_GLOBAL;
  422. }
  423. ctx()->s_expr.push(make_expr<NameExpr>(name, scope));
  424. }
  425. void Compiler::exprAttrib() {
  426. consume(TK("@id"));
  427. ctx()->s_expr.push(
  428. make_expr<AttribExpr>(ctx()->s_expr.popx(), StrName::get(prev().sv()))
  429. );
  430. }
  431. void Compiler::exprSlice0() {
  432. auto slice = make_expr<SliceExpr>();
  433. if(is_expression()){ // :<stop>
  434. EXPR();
  435. slice->stop = ctx()->s_expr.popx();
  436. // try optional step
  437. if(match(TK(":"))){ // :<stop>:<step>
  438. EXPR();
  439. slice->step = ctx()->s_expr.popx();
  440. }
  441. }else if(match(TK(":"))){
  442. if(is_expression()){ // ::<step>
  443. EXPR();
  444. slice->step = ctx()->s_expr.popx();
  445. } // else ::
  446. } // else :
  447. ctx()->s_expr.push(std::move(slice));
  448. }
  449. void Compiler::exprSlice1() {
  450. auto slice = make_expr<SliceExpr>();
  451. slice->start = ctx()->s_expr.popx();
  452. if(is_expression()){ // <start>:<stop>
  453. EXPR();
  454. slice->stop = ctx()->s_expr.popx();
  455. // try optional step
  456. if(match(TK(":"))){ // <start>:<stop>:<step>
  457. EXPR();
  458. slice->step = ctx()->s_expr.popx();
  459. }
  460. }else if(match(TK(":"))){ // <start>::<step>
  461. EXPR();
  462. slice->step = ctx()->s_expr.popx();
  463. } // else <start>:
  464. ctx()->s_expr.push(std::move(slice));
  465. }
  466. void Compiler::exprSubscr() {
  467. auto e = make_expr<SubscrExpr>();
  468. match_newlines_repl();
  469. e->a = ctx()->s_expr.popx(); // a
  470. EXPR_TUPLE(true);
  471. e->b = ctx()->s_expr.popx(); // a[<expr>]
  472. match_newlines_repl();
  473. consume(TK("]"));
  474. ctx()->s_expr.push(std::move(e));
  475. }
  476. void Compiler::exprLiteral0() {
  477. ctx()->s_expr.push(make_expr<Literal0Expr>(prev().type));
  478. }
  479. void Compiler::compile_block_body(void (Compiler::*callback)()) {
  480. if(callback == nullptr) callback = &Compiler::compile_stmt;
  481. consume(TK(":"));
  482. if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){
  483. while(true){
  484. compile_stmt();
  485. bool possible = curr().type!=TK("@eol") && curr().type!=TK("@eof");
  486. if(prev().type != TK(";") || !possible) break;
  487. }
  488. return;
  489. }
  490. if(!match_newlines(mode()==REPL_MODE)){
  491. SyntaxError("expected a new line after ':'");
  492. }
  493. consume(TK("@indent"));
  494. while (curr().type != TK("@dedent")) {
  495. match_newlines();
  496. (this->*callback)();
  497. match_newlines();
  498. }
  499. consume(TK("@dedent"));
  500. }
  501. // import a [as b]
  502. // import a [as b], c [as d]
  503. void Compiler::compile_normal_import() {
  504. do {
  505. consume(TK("@id"));
  506. Str name = prev().str();
  507. ctx()->emit_(OP_IMPORT_PATH, ctx()->add_const_string(name.sv()), prev().line);
  508. if (match(TK("as"))) {
  509. consume(TK("@id"));
  510. name = prev().str();
  511. }
  512. ctx()->emit_store_name(name_scope(), StrName(name), prev().line);
  513. } while (match(TK(",")));
  514. consume_end_stmt();
  515. }
  516. // from a import b [as c], d [as e]
  517. // from a.b import c [as d]
  518. // from . import a [as b]
  519. // from .a import b [as c]
  520. // from ..a import b [as c]
  521. // from .a.b import c [as d]
  522. // from xxx import *
  523. void Compiler::compile_from_import() {
  524. int dots = 0;
  525. while(true){
  526. switch(curr().type){
  527. case TK("."): dots+=1; break;
  528. case TK(".."): dots+=2; break;
  529. case TK("..."): dots+=3; break;
  530. default: goto __EAT_DOTS_END;
  531. }
  532. advance();
  533. }
  534. __EAT_DOTS_END:
  535. SStream ss;
  536. for(int i=0; i<dots; i++) ss << '.';
  537. if(dots > 0){
  538. // @id is optional if dots > 0
  539. if(match(TK("@id"))){
  540. ss << prev().sv();
  541. while (match(TK("."))) {
  542. consume(TK("@id"));
  543. ss << "." << prev().sv();
  544. }
  545. }
  546. }else{
  547. // @id is required if dots == 0
  548. consume(TK("@id"));
  549. ss << prev().sv();
  550. while (match(TK("."))) {
  551. consume(TK("@id"));
  552. ss << "." << prev().sv();
  553. }
  554. }
  555. ctx()->emit_(OP_IMPORT_PATH, ctx()->add_const_string(ss.str().sv()), prev().line);
  556. consume(TK("import"));
  557. if (match(TK("*"))) {
  558. if(name_scope() != NAME_GLOBAL) SyntaxError("from <module> import * can only be used in global scope");
  559. // pop the module and import __all__
  560. ctx()->emit_(OP_POP_IMPORT_STAR, BC_NOARG, prev().line);
  561. consume_end_stmt();
  562. return;
  563. }
  564. do {
  565. ctx()->emit_(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  566. consume(TK("@id"));
  567. Str name = prev().str();
  568. ctx()->emit_(OP_LOAD_ATTR, StrName(name).index, prev().line);
  569. if (match(TK("as"))) {
  570. consume(TK("@id"));
  571. name = prev().str();
  572. }
  573. ctx()->emit_store_name(name_scope(), StrName(name), prev().line);
  574. } while (match(TK(",")));
  575. ctx()->emit_(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  576. consume_end_stmt();
  577. }
  578. bool Compiler::is_expression(bool allow_slice){
  579. PrattCallback prefix = rules[curr().type].prefix;
  580. return prefix != nullptr && (allow_slice || curr().type!=TK(":"));
  581. }
  582. void Compiler::parse_expression(int precedence, bool allow_slice) {
  583. PrattCallback prefix = rules[curr().type].prefix;
  584. if (prefix==nullptr || (curr().type==TK(":") && !allow_slice)){
  585. SyntaxError(Str("expected an expression, got ") + TK_STR(curr().type));
  586. }
  587. advance();
  588. (this->*prefix)();
  589. while (rules[curr().type].precedence >= precedence && (allow_slice || curr().type!=TK(":"))) {
  590. TokenIndex op = curr().type;
  591. advance();
  592. PrattCallback infix = rules[op].infix;
  593. PK_ASSERT(infix != nullptr);
  594. (this->*infix)();
  595. }
  596. }
  597. void Compiler::compile_if_stmt() {
  598. EXPR(); // condition
  599. ctx()->emit_expr();
  600. int patch = ctx()->emit_(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  601. compile_block_body();
  602. if (match(TK("elif"))) {
  603. int exit_patch = ctx()->emit_(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  604. ctx()->patch_jump(patch);
  605. compile_if_stmt();
  606. ctx()->patch_jump(exit_patch);
  607. } else if (match(TK("else"))) {
  608. int exit_patch = ctx()->emit_(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  609. ctx()->patch_jump(patch);
  610. compile_block_body();
  611. ctx()->patch_jump(exit_patch);
  612. } else {
  613. ctx()->patch_jump(patch);
  614. }
  615. }
  616. void Compiler::compile_while_loop() {
  617. CodeBlock* block = ctx()->enter_block(CodeBlockType::WHILE_LOOP);
  618. EXPR(); // condition
  619. ctx()->emit_expr();
  620. int patch = ctx()->emit_(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  621. compile_block_body();
  622. ctx()->emit_(OP_LOOP_CONTINUE, ctx()->get_loop(), BC_KEEPLINE, true);
  623. ctx()->patch_jump(patch);
  624. ctx()->exit_block();
  625. // optional else clause
  626. if (match(TK("else"))) {
  627. compile_block_body();
  628. block->end2 = ctx()->co->codes.size();
  629. }
  630. }
  631. void Compiler::compile_for_loop() {
  632. Expr_ vars = EXPR_VARS();
  633. consume(TK("in"));
  634. EXPR_TUPLE(); ctx()->emit_expr();
  635. ctx()->emit_(OP_GET_ITER, BC_NOARG, BC_KEEPLINE);
  636. CodeBlock* block = ctx()->enter_block(CodeBlockType::FOR_LOOP);
  637. int for_codei = ctx()->emit_(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE);
  638. bool ok = vars->emit_store(ctx());
  639. if(!ok) SyntaxError(); // this error occurs in `vars` instead of this line, but...nevermind
  640. ctx()->try_merge_for_iter_store(for_codei);
  641. compile_block_body();
  642. ctx()->emit_(OP_LOOP_CONTINUE, ctx()->get_loop(), BC_KEEPLINE, true);
  643. ctx()->exit_block();
  644. // optional else clause
  645. if (match(TK("else"))) {
  646. compile_block_body();
  647. block->end2 = ctx()->co->codes.size();
  648. }
  649. }
  650. void Compiler::compile_try_except() {
  651. ctx()->enter_block(CodeBlockType::TRY_EXCEPT);
  652. compile_block_body();
  653. small_vector_2<int, 6> patches;
  654. patches.push_back(
  655. ctx()->emit_(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE)
  656. );
  657. ctx()->exit_block();
  658. int finally_entry = -1;
  659. if(curr().type != TK("finally")){
  660. do {
  661. StrName as_name;
  662. consume(TK("except"));
  663. if(is_expression()){
  664. EXPR(); // push assumed type on to the stack
  665. ctx()->emit_expr();
  666. ctx()->emit_(OP_EXCEPTION_MATCH, BC_NOARG, prev().line);
  667. if(match(TK("as"))){
  668. consume(TK("@id"));
  669. as_name = StrName(prev().sv());
  670. }
  671. }else{
  672. ctx()->emit_(OP_LOAD_TRUE, BC_NOARG, BC_KEEPLINE);
  673. }
  674. int patch = ctx()->emit_(OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE);
  675. // on match
  676. if(!as_name.empty()){
  677. ctx()->emit_(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  678. ctx()->emit_store_name(name_scope(), as_name, BC_KEEPLINE);
  679. }
  680. // pop the exception
  681. ctx()->emit_(OP_POP_EXCEPTION, BC_NOARG, BC_KEEPLINE);
  682. compile_block_body();
  683. patches.push_back(ctx()->emit_(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE));
  684. ctx()->patch_jump(patch);
  685. }while(curr().type == TK("except"));
  686. }
  687. if(match(TK("finally"))){
  688. int patch = ctx()->emit_(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE);
  689. finally_entry = ctx()->co->codes.size();
  690. compile_block_body();
  691. ctx()->emit_(OP_JUMP_ABSOLUTE_TOP, BC_NOARG, BC_KEEPLINE);
  692. ctx()->patch_jump(patch);
  693. }
  694. // no match, re-raise
  695. if(finally_entry != -1){
  696. i64 target = ctx()->co->codes.size()+2;
  697. ctx()->emit_(OP_LOAD_CONST, ctx()->add_const(VAR(target)), BC_KEEPLINE);
  698. ctx()->emit_(OP_JUMP_ABSOLUTE, finally_entry, BC_KEEPLINE);
  699. }
  700. ctx()->emit_(OP_RE_RAISE, BC_NOARG, BC_KEEPLINE);
  701. // no exception or no match, jump to the end
  702. for (int patch : patches) ctx()->patch_jump(patch);
  703. if(finally_entry != -1){
  704. i64 target = ctx()->co->codes.size()+2;
  705. ctx()->emit_(OP_LOAD_CONST, ctx()->add_const(VAR(target)), BC_KEEPLINE);
  706. ctx()->emit_(OP_JUMP_ABSOLUTE, finally_entry, BC_KEEPLINE);
  707. }
  708. }
  709. void Compiler::compile_decorated(){
  710. Expr_vector decorators;
  711. do{
  712. EXPR();
  713. decorators.push_back(ctx()->s_expr.popx());
  714. if(!match_newlines_repl()) SyntaxError();
  715. }while(match(TK("@")));
  716. if(match(TK("class"))){
  717. compile_class(decorators);
  718. }else{
  719. consume(TK("def"));
  720. compile_function(decorators);
  721. }
  722. }
  723. bool Compiler::try_compile_assignment(){
  724. switch (curr().type) {
  725. case TK("+="): case TK("-="): case TK("*="): case TK("/="): case TK("//="): case TK("%="):
  726. case TK("<<="): case TK(">>="): case TK("&="): case TK("|="): case TK("^="): {
  727. Expr* lhs_p = ctx()->s_expr.top().get();
  728. if(lhs_p->is_starred()) SyntaxError();
  729. if(ctx()->is_compiling_class) SyntaxError("can't use inplace operator in class definition");
  730. advance();
  731. auto e = make_expr<BinaryExpr>();
  732. e->op = prev().type - 1; // -1 to remove =
  733. e->lhs = ctx()->s_expr.popx();
  734. EXPR_TUPLE();
  735. e->rhs = ctx()->s_expr.popx();
  736. if(e->is_starred()) SyntaxError();
  737. e->emit_(ctx());
  738. bool ok = lhs_p->emit_store(ctx());
  739. if(!ok) SyntaxError();
  740. } return true;
  741. case TK("="): {
  742. int n = 0;
  743. while(match(TK("="))){
  744. EXPR_TUPLE();
  745. n += 1;
  746. }
  747. // stack size is n+1
  748. Expr_ val = ctx()->s_expr.popx();
  749. val->emit_(ctx());
  750. for(int j=1; j<n; j++) ctx()->emit_(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  751. for(int j=0; j<n; j++){
  752. auto e = ctx()->s_expr.popx();
  753. if(e->is_starred()) SyntaxError();
  754. bool ok = e->emit_store(ctx());
  755. if(!ok) SyntaxError();
  756. }
  757. } return true;
  758. default: return false;
  759. }
  760. }
  761. void Compiler::compile_stmt() {
  762. if(match(TK("class"))){
  763. compile_class();
  764. return;
  765. }
  766. advance();
  767. int kw_line = prev().line; // backup line number
  768. int curr_loop_block = ctx()->get_loop();
  769. switch(prev().type){
  770. case TK("break"):
  771. if (curr_loop_block < 0) SyntaxError("'break' outside loop");
  772. ctx()->emit_(OP_LOOP_BREAK, curr_loop_block, kw_line);
  773. consume_end_stmt();
  774. break;
  775. case TK("continue"):
  776. if (curr_loop_block < 0) SyntaxError("'continue' not properly in loop");
  777. ctx()->emit_(OP_LOOP_CONTINUE, curr_loop_block, kw_line);
  778. consume_end_stmt();
  779. break;
  780. case TK("yield"):
  781. if (contexts.size() <= 1) SyntaxError("'yield' outside function");
  782. EXPR_TUPLE(); ctx()->emit_expr();
  783. ctx()->emit_(OP_YIELD_VALUE, BC_NOARG, kw_line);
  784. consume_end_stmt();
  785. break;
  786. case TK("yield from"):
  787. if (contexts.size() <= 1) SyntaxError("'yield from' outside function");
  788. EXPR_TUPLE(); ctx()->emit_expr();
  789. ctx()->emit_(OP_GET_ITER, BC_NOARG, kw_line);
  790. ctx()->enter_block(CodeBlockType::FOR_LOOP);
  791. ctx()->emit_(OP_FOR_ITER_YIELD_VALUE, BC_NOARG, kw_line);
  792. ctx()->emit_(OP_LOOP_CONTINUE, ctx()->get_loop(), kw_line);
  793. ctx()->exit_block();
  794. consume_end_stmt();
  795. break;
  796. case TK("return"):
  797. if (contexts.size() <= 1) SyntaxError("'return' outside function");
  798. if(match_end_stmt()){
  799. ctx()->emit_(OP_RETURN_VALUE, 1, kw_line);
  800. }else{
  801. EXPR_TUPLE(); ctx()->emit_expr();
  802. consume_end_stmt();
  803. ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, kw_line);
  804. }
  805. break;
  806. /*************************************************/
  807. case TK("if"): compile_if_stmt(); break;
  808. case TK("while"): compile_while_loop(); break;
  809. case TK("for"): compile_for_loop(); break;
  810. case TK("import"): compile_normal_import(); break;
  811. case TK("from"): compile_from_import(); break;
  812. case TK("def"): compile_function(); break;
  813. case TK("@"): compile_decorated(); break;
  814. case TK("try"): compile_try_except(); break;
  815. case TK("pass"): consume_end_stmt(); break;
  816. /*************************************************/
  817. case TK("++"):{
  818. consume(TK("@id"));
  819. StrName name(prev().sv());
  820. NameScope scope = name_scope();
  821. bool is_global = ctx()->global_names.count(name.sv());
  822. if(is_global) scope = NAME_GLOBAL;
  823. switch(scope){
  824. case NAME_LOCAL:
  825. ctx()->emit_(OP_INC_FAST, ctx()->add_varname(name), prev().line);
  826. break;
  827. case NAME_GLOBAL:
  828. ctx()->emit_(OP_INC_GLOBAL, name.index, prev().line);
  829. break;
  830. default: SyntaxError(); break;
  831. }
  832. consume_end_stmt();
  833. break;
  834. }
  835. case TK("--"):{
  836. consume(TK("@id"));
  837. StrName name(prev().sv());
  838. switch(name_scope()){
  839. case NAME_LOCAL:
  840. ctx()->emit_(OP_DEC_FAST, ctx()->add_varname(name), prev().line);
  841. break;
  842. case NAME_GLOBAL:
  843. ctx()->emit_(OP_DEC_GLOBAL, name.index, prev().line);
  844. break;
  845. default: SyntaxError(); break;
  846. }
  847. consume_end_stmt();
  848. break;
  849. }
  850. case TK("assert"):{
  851. EXPR(); // condition
  852. ctx()->emit_expr();
  853. int index = ctx()->emit_(OP_POP_JUMP_IF_TRUE, BC_NOARG, kw_line);
  854. int has_msg = 0;
  855. if(match(TK(","))){
  856. EXPR(); // message
  857. ctx()->emit_expr();
  858. has_msg = 1;
  859. }
  860. ctx()->emit_(OP_RAISE_ASSERT, has_msg, kw_line);
  861. ctx()->patch_jump(index);
  862. consume_end_stmt();
  863. break;
  864. }
  865. case TK("global"):
  866. do {
  867. consume(TK("@id"));
  868. ctx()->global_names.insert(prev().str());
  869. } while (match(TK(",")));
  870. consume_end_stmt();
  871. break;
  872. case TK("raise"): {
  873. EXPR(); ctx()->emit_expr();
  874. ctx()->emit_(OP_RAISE, BC_NOARG, kw_line);
  875. consume_end_stmt();
  876. } break;
  877. case TK("del"): {
  878. EXPR_TUPLE();
  879. Expr_ e = ctx()->s_expr.popx();
  880. bool ok = e->emit_del(ctx());
  881. if(!ok) SyntaxError();
  882. consume_end_stmt();
  883. } break;
  884. case TK("with"): {
  885. EXPR(); // [ <expr> ]
  886. ctx()->emit_expr();
  887. ctx()->enter_block(CodeBlockType::CONTEXT_MANAGER);
  888. Expr_ as_name;
  889. if(match(TK("as"))){
  890. consume(TK("@id"));
  891. as_name = make_expr<NameExpr>(prev().str(), name_scope());
  892. }
  893. ctx()->emit_(OP_WITH_ENTER, BC_NOARG, prev().line);
  894. // [ <expr> <expr>.__enter__() ]
  895. if(as_name != nullptr){
  896. bool ok = as_name->emit_store(ctx());
  897. if(!ok) SyntaxError();
  898. }else{
  899. ctx()->emit_(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  900. }
  901. compile_block_body();
  902. ctx()->emit_(OP_WITH_EXIT, BC_NOARG, prev().line);
  903. ctx()->exit_block();
  904. } break;
  905. /*************************************************/
  906. case TK("=="): {
  907. consume(TK("@id"));
  908. if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE");
  909. bool ok = ctx()->add_label(prev().str());
  910. consume(TK("=="));
  911. if(!ok) SyntaxError("label " + prev().str().escape() + " already exists");
  912. consume_end_stmt();
  913. } break;
  914. case TK("->"):
  915. consume(TK("@id"));
  916. if(mode()!=EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE");
  917. ctx()->emit_(OP_GOTO, StrName(prev().sv()).index, prev().line);
  918. consume_end_stmt();
  919. break;
  920. /*************************************************/
  921. // handle dangling expression or assignment
  922. default: {
  923. advance(-1); // do revert since we have pre-called advance() at the beginning
  924. EXPR_TUPLE();
  925. bool is_typed_name = false; // e.g. x: int
  926. // eat variable's type hint if it is a single name
  927. if(ctx()->s_expr.top()->is_name()){
  928. if(match(TK(":"))){
  929. consume_type_hints();
  930. is_typed_name = true;
  931. if(ctx()->is_compiling_class){
  932. NameExpr* ne = static_cast<NameExpr*>(ctx()->s_expr.top().get());
  933. ctx()->emit_(OP_ADD_CLASS_ANNOTATION, ne->name.index, BC_KEEPLINE);
  934. }
  935. }
  936. }
  937. if(!try_compile_assignment()){
  938. if(!ctx()->s_expr.empty() && ctx()->s_expr.top()->is_starred()){
  939. SyntaxError();
  940. }
  941. if(!is_typed_name){
  942. ctx()->emit_expr();
  943. if((mode()==CELL_MODE || mode()==REPL_MODE) && name_scope()==NAME_GLOBAL){
  944. ctx()->emit_(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE);
  945. }else{
  946. ctx()->emit_(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  947. }
  948. }else{
  949. PK_ASSERT(ctx()->s_expr.size() == 1)
  950. ctx()->s_expr.pop();
  951. }
  952. }
  953. consume_end_stmt();
  954. }
  955. }
  956. }
  957. void Compiler::consume_type_hints(){
  958. EXPR();
  959. ctx()->s_expr.pop();
  960. }
  961. void Compiler::_add_decorators(const Expr_vector& decorators){
  962. // [obj]
  963. for(auto it=decorators.rbegin(); it!=decorators.rend(); ++it){
  964. (*it)->emit_(ctx()); // [obj, f]
  965. ctx()->emit_(OP_ROT_TWO, BC_NOARG, (*it)->line); // [f, obj]
  966. ctx()->emit_(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE); // [f, obj, NULL]
  967. ctx()->emit_(OP_ROT_TWO, BC_NOARG, BC_KEEPLINE); // [obj, NULL, f]
  968. ctx()->emit_(OP_CALL, 1, (*it)->line); // [obj]
  969. }
  970. }
  971. void Compiler::compile_class(const Expr_vector& decorators){
  972. consume(TK("@id"));
  973. int namei = StrName(prev().sv()).index;
  974. Expr_ base = nullptr;
  975. if(match(TK("("))){
  976. if(is_expression()){
  977. EXPR();
  978. base = ctx()->s_expr.popx();
  979. }
  980. consume(TK(")"));
  981. }
  982. if(base == nullptr){
  983. ctx()->emit_(OP_LOAD_NONE, BC_NOARG, prev().line);
  984. }else {
  985. base->emit_(ctx());
  986. }
  987. ctx()->emit_(OP_BEGIN_CLASS, namei, BC_KEEPLINE);
  988. for(auto& c: this->contexts.container()){
  989. if(c.is_compiling_class){
  990. SyntaxError("nested class is not allowed");
  991. }
  992. }
  993. ctx()->is_compiling_class = true;
  994. compile_block_body();
  995. ctx()->is_compiling_class = false;
  996. if(!decorators.empty()){
  997. ctx()->emit_(OP_BEGIN_CLASS_DECORATION, BC_NOARG, BC_KEEPLINE);
  998. _add_decorators(decorators);
  999. ctx()->emit_(OP_END_CLASS_DECORATION, BC_NOARG, BC_KEEPLINE);
  1000. }
  1001. ctx()->emit_(OP_END_CLASS, namei, BC_KEEPLINE);
  1002. }
  1003. void Compiler::_compile_f_args(FuncDecl_ decl, bool enable_type_hints){
  1004. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  1005. do {
  1006. if(state > 3) SyntaxError();
  1007. if(state == 3) SyntaxError("**kwargs should be the last argument");
  1008. match_newlines();
  1009. if(match(TK("*"))){
  1010. if(state < 1) state = 1;
  1011. else SyntaxError("*args should be placed before **kwargs");
  1012. }
  1013. else if(match(TK("**"))){
  1014. state = 3;
  1015. }
  1016. consume(TK("@id"));
  1017. StrName name = prev().str();
  1018. // check duplicate argument name
  1019. for(int j: decl->args){
  1020. if(decl->code->varnames[j] == name) {
  1021. SyntaxError("duplicate argument name");
  1022. }
  1023. }
  1024. for(auto& kv: decl->kwargs){
  1025. if(decl->code->varnames[kv.index] == name){
  1026. SyntaxError("duplicate argument name");
  1027. }
  1028. }
  1029. if(decl->starred_arg!=-1 && decl->code->varnames[decl->starred_arg] == name){
  1030. SyntaxError("duplicate argument name");
  1031. }
  1032. if(decl->starred_kwarg!=-1 && decl->code->varnames[decl->starred_kwarg] == name){
  1033. SyntaxError("duplicate argument name");
  1034. }
  1035. // eat type hints
  1036. if(enable_type_hints && match(TK(":"))) consume_type_hints();
  1037. if(state == 0 && curr().type == TK("=")) state = 2;
  1038. int index = ctx()->add_varname(name);
  1039. switch (state)
  1040. {
  1041. case 0:
  1042. decl->args.push_back(index);
  1043. break;
  1044. case 1:
  1045. decl->starred_arg = index;
  1046. state+=1;
  1047. break;
  1048. case 2: {
  1049. consume(TK("="));
  1050. PyObject* value = read_literal();
  1051. if(value == nullptr){
  1052. SyntaxError(Str("default argument must be a literal"));
  1053. }
  1054. decl->add_kwarg(index, name, value);
  1055. } break;
  1056. case 3:
  1057. decl->starred_kwarg = index;
  1058. state+=1;
  1059. break;
  1060. }
  1061. } while (match(TK(",")));
  1062. }
  1063. void Compiler::compile_function(const Expr_vector& decorators){
  1064. consume(TK("@id"));
  1065. Str decl_name = prev().str();
  1066. FuncDecl_ decl = push_f_context(decl_name);
  1067. consume(TK("("));
  1068. if (!match(TK(")"))) {
  1069. _compile_f_args(decl, true);
  1070. consume(TK(")"));
  1071. }
  1072. if(match(TK("->"))) consume_type_hints();
  1073. compile_block_body();
  1074. pop_context();
  1075. decl->docstring = nullptr;
  1076. if(decl->code->codes.size()>=2 && decl->code->codes[0].op == OP_LOAD_CONST && decl->code->codes[1].op == OP_POP_TOP){
  1077. PyObject* c = decl->code->consts[decl->code->codes[0].arg];
  1078. if(is_type(c, vm->tp_str)){
  1079. decl->code->codes[0].op = OP_NO_OP;
  1080. decl->code->codes[1].op = OP_NO_OP;
  1081. decl->docstring = PK_OBJ_GET(Str, c).c_str();
  1082. }
  1083. }
  1084. ctx()->emit_(OP_LOAD_FUNCTION, ctx()->add_func_decl(decl), prev().line);
  1085. _add_decorators(decorators);
  1086. if(!ctx()->is_compiling_class){
  1087. auto e = make_expr<NameExpr>(decl_name, name_scope());
  1088. e->emit_store(ctx());
  1089. }else{
  1090. int index = StrName(decl_name).index;
  1091. ctx()->emit_(OP_STORE_CLASS_ATTR, index, prev().line);
  1092. }
  1093. }
  1094. PyObject* Compiler::to_object(const TokenValue& value){
  1095. PyObject* obj = nullptr;
  1096. if(std::holds_alternative<i64>(value)){
  1097. obj = VAR(std::get<i64>(value));
  1098. }
  1099. if(std::holds_alternative<f64>(value)){
  1100. obj = VAR(std::get<f64>(value));
  1101. }
  1102. if(std::holds_alternative<Str>(value)){
  1103. obj = VAR(std::get<Str>(value));
  1104. }
  1105. PK_ASSERT(obj != nullptr)
  1106. return obj;
  1107. }
  1108. PyObject* Compiler::read_literal(){
  1109. advance();
  1110. switch(prev().type){
  1111. case TK("-"): {
  1112. consume(TK("@num"));
  1113. PyObject* val = to_object(prev().value);
  1114. return vm->py_negate(val);
  1115. }
  1116. case TK("@num"): return to_object(prev().value);
  1117. case TK("@str"): return to_object(prev().value);
  1118. case TK("True"): return VAR(true);
  1119. case TK("False"): return VAR(false);
  1120. case TK("None"): return vm->None;
  1121. case TK("..."): return vm->Ellipsis;
  1122. case TK("("): {
  1123. List cpnts;
  1124. while(true) {
  1125. cpnts.push_back(read_literal());
  1126. if(curr().type == TK(")")) break;
  1127. consume(TK(","));
  1128. if(curr().type == TK(")")) break;
  1129. }
  1130. consume(TK(")"));
  1131. return VAR(Tuple(std::move(cpnts)));
  1132. }
  1133. default: break;
  1134. }
  1135. return nullptr;
  1136. }
  1137. Compiler::Compiler(VM* vm, std::string_view source, const Str& filename, CompileMode mode, bool unknown_global_scope)
  1138. :lexer(vm, std::make_shared<SourceData>(source, filename, mode)){
  1139. this->vm = vm;
  1140. this->unknown_global_scope = unknown_global_scope;
  1141. init_pratt_rules();
  1142. }
  1143. Str Compiler::precompile(){
  1144. auto tokens = lexer.run();
  1145. SStream ss;
  1146. ss << "pkpy:" PK_VERSION << '\n'; // L1: version string
  1147. ss << (int)mode() << '\n'; // L2: mode
  1148. std::map<std::string_view, int> token_indices;
  1149. for(auto token: tokens){
  1150. if(is_raw_string_used(token.type)){
  1151. auto it = token_indices.find(token.sv());
  1152. if(it == token_indices.end()){
  1153. token_indices[token.sv()] = 0;
  1154. // assert no '\n' in token.sv()
  1155. for(char c: token.sv()) if(c=='\n') PK_FATAL_ERROR();
  1156. }
  1157. }
  1158. }
  1159. ss << "=" << (int)token_indices.size() << '\n'; // L3: raw string count
  1160. int index = 0;
  1161. for(auto& kv: token_indices){
  1162. ss << kv.first << '\n'; // L4: raw strings
  1163. kv.second = index++;
  1164. }
  1165. ss << "=" << (int)tokens.size() << '\n'; // L5: token count
  1166. for(int i=0; i<tokens.size(); i++){
  1167. const Token& token = tokens[i];
  1168. ss << (int)token.type << ',';
  1169. if(is_raw_string_used(token.type)){
  1170. ss << token_indices[token.sv()] << ',';
  1171. }
  1172. if(i>0 && tokens[i-1].line == token.line) ss << ',';
  1173. else ss << token.line << ',';
  1174. if(i>0 && tokens[i-1].brackets_level == token.brackets_level) ss << ',';
  1175. else ss << token.brackets_level << ',';
  1176. // visit token value
  1177. std::visit([&ss](auto&& arg){
  1178. using T = std::decay_t<decltype(arg)>;
  1179. if constexpr(std::is_same_v<T, i64>){
  1180. ss << 'I' << arg;
  1181. }else if constexpr(std::is_same_v<T, f64>){
  1182. ss << 'F' << arg;
  1183. }else if constexpr(std::is_same_v<T, Str>){
  1184. ss << 'S';
  1185. for(char c: arg) ss.write_hex((unsigned char)c);
  1186. }
  1187. ss << '\n';
  1188. }, token.value);
  1189. }
  1190. return ss.str();
  1191. }
  1192. void Compiler::from_precompiled(const char* source){
  1193. TokenDeserializer deserializer(source);
  1194. deserializer.curr += 5; // skip "pkpy:"
  1195. std::string_view version = deserializer.read_string('\n');
  1196. if(version != PK_VERSION){
  1197. Str error = _S("precompiled version mismatch: ", version, "!=" PK_VERSION);
  1198. throw std::runtime_error(error.c_str());
  1199. }
  1200. if(deserializer.read_uint('\n') != (i64)mode()){
  1201. throw std::runtime_error("precompiled mode mismatch");
  1202. }
  1203. int count = deserializer.read_count();
  1204. std::vector<Str>& precompiled_tokens = lexer.src->_precompiled_tokens;
  1205. for(int i=0; i<count; i++){
  1206. precompiled_tokens.push_back(deserializer.read_string('\n'));
  1207. }
  1208. count = deserializer.read_count();
  1209. for(int i=0; i<count; i++){
  1210. Token t;
  1211. t.type = (unsigned char)deserializer.read_uint(',');
  1212. if(is_raw_string_used(t.type)){
  1213. i64 index = deserializer.read_uint(',');
  1214. t.start = precompiled_tokens[index].c_str();
  1215. t.length = precompiled_tokens[index].size;
  1216. }else{
  1217. t.start = nullptr;
  1218. t.length = 0;
  1219. }
  1220. if(deserializer.match_char(',')){
  1221. t.line = tokens.back().line;
  1222. }else{
  1223. t.line = (int)deserializer.read_uint(',');
  1224. }
  1225. if(deserializer.match_char(',')){
  1226. t.brackets_level = tokens.back().brackets_level;
  1227. }else{
  1228. t.brackets_level = (int)deserializer.read_uint(',');
  1229. }
  1230. char type = deserializer.read_char();
  1231. switch(type){
  1232. case 'I': t.value = deserializer.read_uint('\n'); break;
  1233. case 'F': t.value = deserializer.read_float('\n'); break;
  1234. case 'S': t.value = deserializer.read_string_from_hex('\n'); break;
  1235. default: t.value = {}; break;
  1236. }
  1237. tokens.push_back(t);
  1238. }
  1239. }
  1240. CodeObject_ Compiler::compile(){
  1241. PK_ASSERT(i == 0) // make sure it is the first time to compile
  1242. if(lexer.src->is_precompiled){
  1243. from_precompiled(lexer.src->source.c_str());
  1244. }else{
  1245. this->tokens = lexer.run();
  1246. }
  1247. CodeObject_ code = push_global_context();
  1248. advance(); // skip @sof, so prev() is always valid
  1249. match_newlines(); // skip possible leading '\n'
  1250. if(mode()==EVAL_MODE) {
  1251. EXPR_TUPLE(); ctx()->emit_expr();
  1252. consume(TK("@eof"));
  1253. ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  1254. pop_context();
  1255. return code;
  1256. }else if(mode()==JSON_MODE){
  1257. EXPR();
  1258. Expr_ e = ctx()->s_expr.popx();
  1259. if(!e->is_json_object()) SyntaxError("expect a JSON object, literal or array");
  1260. consume(TK("@eof"));
  1261. e->emit_(ctx());
  1262. ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  1263. pop_context();
  1264. return code;
  1265. }
  1266. while (!match(TK("@eof"))) {
  1267. compile_stmt();
  1268. match_newlines();
  1269. }
  1270. pop_context();
  1271. return code;
  1272. }
  1273. // TODO: refactor this
  1274. void Lexer::throw_err(StrName type, Str msg, int lineno, const char* cursor){
  1275. PyObject* e_obj = vm->call(vm->builtins->attr(type), VAR(msg));
  1276. Exception& e = PK_OBJ_GET(Exception, e_obj);
  1277. e.st_push(src, lineno, cursor, "");
  1278. throw e;
  1279. }
  1280. std::string_view TokenDeserializer::read_string(char c){
  1281. const char* start = curr;
  1282. while(*curr != c) curr++;
  1283. std::string_view retval(start, curr-start);
  1284. curr++; // skip the delimiter
  1285. return retval;
  1286. }
  1287. Str TokenDeserializer::read_string_from_hex(char c){
  1288. std::string_view s = read_string(c);
  1289. char* buffer = (char*)pool64_alloc(s.size()/2 + 1);
  1290. for(int i=0; i<s.size(); i+=2){
  1291. char c = 0;
  1292. if(s[i]>='0' && s[i]<='9') c += s[i]-'0';
  1293. else if(s[i]>='a' && s[i]<='f') c += s[i]-'a'+10;
  1294. else PK_FATAL_ERROR();
  1295. c <<= 4;
  1296. if(s[i+1]>='0' && s[i+1]<='9') c += s[i+1]-'0';
  1297. else if(s[i+1]>='a' && s[i+1]<='f') c += s[i+1]-'a'+10;
  1298. else PK_FATAL_ERROR();
  1299. buffer[i/2] = c;
  1300. }
  1301. buffer[s.size()/2] = 0;
  1302. return std::pair<char*, int>(buffer, s.size()/2);
  1303. }
  1304. int TokenDeserializer::read_count(){
  1305. PK_ASSERT(*curr == '=')
  1306. curr++;
  1307. return read_uint('\n');
  1308. }
  1309. i64 TokenDeserializer::read_uint(char c){
  1310. i64 out = 0;
  1311. while(*curr != c){
  1312. out = out*10 + (*curr-'0');
  1313. curr++;
  1314. }
  1315. curr++; // skip the delimiter
  1316. return out;
  1317. }
  1318. f64 TokenDeserializer::read_float(char c){
  1319. std::string_view sv = read_string(c);
  1320. return std::stod(std::string(sv));
  1321. }
  1322. } // namespace pkpy