compiler.cpp 55 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431
  1. #include "pocketpy/compiler.h"
  2. namespace pkpy{
  3. PrattRule Compiler::rules[kTokenCount];
  4. NameScope Compiler::name_scope() const {
  5. auto s = contexts.size()>1 ? NAME_LOCAL : NAME_GLOBAL;
  6. if(unknown_global_scope && s == NAME_GLOBAL) s = NAME_GLOBAL_UNKNOWN;
  7. return s;
  8. }
  9. CodeObject_ Compiler::push_global_context(){
  10. CodeObject_ co = std::make_shared<CodeObject>(lexer.src, lexer.src->filename);
  11. co->start_line = i==0 ? 1 : prev().line;
  12. contexts.push(CodeEmitContext(vm, co, contexts.size()));
  13. return co;
  14. }
  15. FuncDecl_ Compiler::push_f_context(Str name){
  16. FuncDecl_ decl = std::make_shared<FuncDecl>();
  17. decl->code = std::make_shared<CodeObject>(lexer.src, name);
  18. decl->code->start_line = i==0 ? 1 : prev().line;
  19. decl->nested = name_scope() == NAME_LOCAL;
  20. contexts.push(CodeEmitContext(vm, decl->code, contexts.size()));
  21. contexts.top().func = decl;
  22. return decl;
  23. }
  24. void Compiler::pop_context(){
  25. if(!ctx()->s_expr.empty()){
  26. throw std::runtime_error("!ctx()->s_expr.empty()");
  27. }
  28. // add a `return None` in the end as a guard
  29. // previously, we only do this if the last opcode is not a return
  30. // however, this is buggy...since there may be a jump to the end (out of bound) even if the last opcode is a return
  31. ctx()->emit_(OP_RETURN_VALUE, 1, BC_KEEPLINE, true);
  32. // find the last valid token
  33. int j = i-1;
  34. while(tokens[j].type == TK("@eol") || tokens[j].type == TK("@dedent") || tokens[j].type == TK("@eof")) j--;
  35. ctx()->co->end_line = tokens[j].line;
  36. // some check here
  37. auto& codes = ctx()->co->codes;
  38. if(ctx()->co->nlocals > PK_MAX_CO_VARNAMES){
  39. SyntaxError("maximum number of local variables exceeded");
  40. }
  41. if(ctx()->co->consts.size() > 65530){
  42. SyntaxError("maximum number of constants exceeded");
  43. }
  44. // pre-compute LOOP_BREAK and LOOP_CONTINUE
  45. for(int i=0; i<codes.size(); i++){
  46. Bytecode& bc = codes[i];
  47. if(bc.op == OP_LOOP_CONTINUE){
  48. bc.set_signed_arg(ctx()->co->blocks[bc.arg].start - i);
  49. }else if(bc.op == OP_LOOP_BREAK){
  50. bc.set_signed_arg(ctx()->co->blocks[bc.arg].get_break_end() - i);
  51. }
  52. }
  53. // pre-compute func->is_simple
  54. FuncDecl_ func = contexts.top().func;
  55. if(func){
  56. // check generator
  57. for(Bytecode bc: func->code->codes){
  58. if(bc.op == OP_YIELD_VALUE || bc.op == OP_FOR_ITER_YIELD_VALUE){
  59. func->type = FuncType::GENERATOR;
  60. for(Bytecode bc: func->code->codes){
  61. if(bc.op == OP_RETURN_VALUE && bc.arg == BC_NOARG){
  62. SyntaxError("'return' with argument inside generator function");
  63. }
  64. }
  65. break;
  66. }
  67. }
  68. if(func->type == FuncType::UNSET){
  69. bool is_simple = true;
  70. if(func->kwargs.size() > 0) is_simple = false;
  71. if(func->starred_arg >= 0) is_simple = false;
  72. if(func->starred_kwarg >= 0) is_simple = false;
  73. if(is_simple){
  74. func->type = FuncType::SIMPLE;
  75. bool is_empty = false;
  76. if(func->code->codes.size() == 1){
  77. Bytecode bc = func->code->codes[0];
  78. if(bc.op == OP_RETURN_VALUE && bc.arg == 1){
  79. is_empty = true;
  80. }
  81. }
  82. if(is_empty) func->type = FuncType::EMPTY;
  83. }
  84. else func->type = FuncType::NORMAL;
  85. }
  86. PK_ASSERT(func->type != FuncType::UNSET);
  87. }
  88. contexts.pop();
  89. }
  90. void Compiler::init_pratt_rules(){
  91. PK_LOCAL_STATIC bool initialized = false;
  92. if(initialized) return;
  93. initialized = true;
  94. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  95. #define PK_METHOD(name) &Compiler::name
  96. #define PK_NO_INFIX nullptr, PREC_LOWEST
  97. for(TokenIndex i=0; i<kTokenCount; i++) rules[i] = { nullptr, PK_NO_INFIX };
  98. rules[TK(".")] = { nullptr, PK_METHOD(exprAttrib), PREC_PRIMARY };
  99. rules[TK("(")] = { PK_METHOD(exprGroup), PK_METHOD(exprCall), PREC_PRIMARY };
  100. rules[TK("[")] = { PK_METHOD(exprList), PK_METHOD(exprSubscr), PREC_PRIMARY };
  101. rules[TK("{")] = { PK_METHOD(exprMap), PK_NO_INFIX };
  102. rules[TK("%")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
  103. rules[TK("+")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_TERM };
  104. rules[TK("-")] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_TERM };
  105. rules[TK("*")] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_FACTOR };
  106. rules[TK("~")] = { PK_METHOD(exprUnaryOp), nullptr, PREC_UNARY };
  107. rules[TK("/")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
  108. rules[TK("//")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
  109. rules[TK("**")] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_EXPONENT };
  110. rules[TK(">")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  111. rules[TK("<")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  112. rules[TK("==")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  113. rules[TK("!=")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  114. rules[TK(">=")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  115. rules[TK("<=")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  116. rules[TK("in")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  117. rules[TK("is")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  118. rules[TK("<<")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  119. rules[TK(">>")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  120. rules[TK("&")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_AND };
  121. rules[TK("|")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_OR };
  122. rules[TK("^")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  123. rules[TK("@")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
  124. rules[TK("if")] = { nullptr, PK_METHOD(exprTernary), PREC_TERNARY };
  125. rules[TK("not in")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  126. rules[TK("is not")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  127. rules[TK("and") ] = { nullptr, PK_METHOD(exprAnd), PREC_LOGICAL_AND };
  128. rules[TK("or")] = { nullptr, PK_METHOD(exprOr), PREC_LOGICAL_OR };
  129. rules[TK("not")] = { PK_METHOD(exprNot), nullptr, PREC_LOGICAL_NOT };
  130. rules[TK("True")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
  131. rules[TK("False")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
  132. rules[TK("None")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
  133. rules[TK("...")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
  134. rules[TK("lambda")] = { PK_METHOD(exprLambda), PK_NO_INFIX };
  135. rules[TK("@id")] = { PK_METHOD(exprName), PK_NO_INFIX };
  136. rules[TK("@num")] = { PK_METHOD(exprLiteral), PK_NO_INFIX };
  137. rules[TK("@str")] = { PK_METHOD(exprLiteral), PK_NO_INFIX };
  138. rules[TK("@fstr")] = { PK_METHOD(exprFString), PK_NO_INFIX };
  139. rules[TK("@long")] = { PK_METHOD(exprLong), PK_NO_INFIX };
  140. rules[TK("@imag")] = { PK_METHOD(exprImag), PK_NO_INFIX };
  141. rules[TK("@bytes")] = { PK_METHOD(exprBytes), PK_NO_INFIX };
  142. rules[TK(":")] = { PK_METHOD(exprSlice0), PK_METHOD(exprSlice1), PREC_PRIMARY };
  143. #undef PK_METHOD
  144. #undef PK_NO_INFIX
  145. }
  146. bool Compiler::match(TokenIndex expected) {
  147. if (curr().type != expected) return false;
  148. advance();
  149. return true;
  150. }
  151. void Compiler::consume(TokenIndex expected) {
  152. if (!match(expected)){
  153. SyntaxError(
  154. _S("expected '", TK_STR(expected), "', got '", TK_STR(curr().type), "'")
  155. );
  156. }
  157. }
  158. bool Compiler::match_newlines_repl(){
  159. return match_newlines(mode()==REPL_MODE);
  160. }
  161. bool Compiler::match_newlines(bool repl_throw) {
  162. bool consumed = false;
  163. if (curr().type == TK("@eol")) {
  164. while (curr().type == TK("@eol")) advance();
  165. consumed = true;
  166. }
  167. if (repl_throw && curr().type == TK("@eof")){
  168. throw NeedMoreLines(ctx()->is_compiling_class);
  169. }
  170. return consumed;
  171. }
  172. bool Compiler::match_end_stmt() {
  173. if (match(TK(";"))) { match_newlines(); return true; }
  174. if (match_newlines() || curr().type == TK("@eof")) return true;
  175. if (curr().type == TK("@dedent")) return true;
  176. return false;
  177. }
  178. void Compiler::consume_end_stmt() {
  179. if (!match_end_stmt()) SyntaxError("expected statement end");
  180. }
  181. void Compiler::EXPR() {
  182. parse_expression(PREC_LOWEST+1);
  183. }
  184. void Compiler::EXPR_TUPLE(bool allow_slice) {
  185. parse_expression(PREC_LOWEST+1, allow_slice);
  186. if(!match(TK(","))) return;
  187. // tuple expression
  188. Expr_vector items;
  189. items.push_back(ctx()->s_expr.popx());
  190. do {
  191. if(curr().brackets_level) match_newlines_repl();
  192. if(!is_expression(allow_slice)) break;
  193. parse_expression(PREC_LOWEST+1, allow_slice);
  194. items.push_back(ctx()->s_expr.popx());
  195. if(curr().brackets_level) match_newlines_repl();
  196. } while(match(TK(",")));
  197. ctx()->s_expr.push(make_expr<TupleExpr>(std::move(items)));
  198. }
  199. // special case for `for loop` and `comp`
  200. Expr_ Compiler::EXPR_VARS(){
  201. Expr_vector items;
  202. do {
  203. consume(TK("@id"));
  204. items.push_back(make_expr<NameExpr>(prev().str(), name_scope()));
  205. } while(match(TK(",")));
  206. if(items.size()==1) return std::move(items[0]);
  207. return make_expr<TupleExpr>(std::move(items));
  208. }
  209. void Compiler::exprLiteral(){
  210. ctx()->s_expr.push(make_expr<LiteralExpr>(prev().value));
  211. }
  212. void Compiler::exprLong(){
  213. ctx()->s_expr.push(make_expr<LongExpr>(prev().str()));
  214. }
  215. void Compiler::exprImag(){
  216. ctx()->s_expr.push(make_expr<ImagExpr>(std::get<f64>(prev().value)));
  217. }
  218. void Compiler::exprBytes(){
  219. ctx()->s_expr.push(make_expr<BytesExpr>(std::get<Str>(prev().value)));
  220. }
  221. void Compiler::exprFString(){
  222. ctx()->s_expr.push(make_expr<FStringExpr>(std::get<Str>(prev().value)));
  223. }
  224. void Compiler::exprLambda(){
  225. FuncDecl_ decl = push_f_context("<lambda>");
  226. auto e = make_expr<LambdaExpr>(decl);
  227. if(!match(TK(":"))){
  228. _compile_f_args(e->decl, false);
  229. consume(TK(":"));
  230. }
  231. // https://github.com/pocketpy/pocketpy/issues/37
  232. parse_expression(PREC_LAMBDA + 1);
  233. ctx()->emit_expr();
  234. ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  235. pop_context();
  236. ctx()->s_expr.push(std::move(e));
  237. }
  238. void Compiler::exprOr(){
  239. auto e = make_expr<OrExpr>();
  240. e->lhs = ctx()->s_expr.popx();
  241. parse_expression(PREC_LOGICAL_OR + 1);
  242. e->rhs = ctx()->s_expr.popx();
  243. ctx()->s_expr.push(std::move(e));
  244. }
  245. void Compiler::exprAnd(){
  246. auto e = make_expr<AndExpr>();
  247. e->lhs = ctx()->s_expr.popx();
  248. parse_expression(PREC_LOGICAL_AND + 1);
  249. e->rhs = ctx()->s_expr.popx();
  250. ctx()->s_expr.push(std::move(e));
  251. }
  252. void Compiler::exprTernary(){
  253. auto e = make_expr<TernaryExpr>();
  254. e->true_expr = ctx()->s_expr.popx();
  255. // cond
  256. parse_expression(PREC_TERNARY + 1);
  257. e->cond = ctx()->s_expr.popx();
  258. consume(TK("else"));
  259. // if false
  260. parse_expression(PREC_TERNARY + 1);
  261. e->false_expr = ctx()->s_expr.popx();
  262. ctx()->s_expr.push(std::move(e));
  263. }
  264. void Compiler::exprBinaryOp(){
  265. auto e = make_expr<BinaryExpr>();
  266. e->op = prev().type;
  267. e->lhs = ctx()->s_expr.popx();
  268. parse_expression(rules[e->op].precedence + 1);
  269. e->rhs = ctx()->s_expr.popx();
  270. ctx()->s_expr.push(std::move(e));
  271. }
  272. void Compiler::exprNot() {
  273. parse_expression(PREC_LOGICAL_NOT + 1);
  274. ctx()->s_expr.push(make_expr<NotExpr>(ctx()->s_expr.popx()));
  275. }
  276. void Compiler::exprUnaryOp(){
  277. TokenIndex op = prev().type;
  278. parse_expression(PREC_UNARY + 1);
  279. switch(op){
  280. case TK("-"):
  281. ctx()->s_expr.push(make_expr<NegatedExpr>(ctx()->s_expr.popx()));
  282. break;
  283. case TK("~"):
  284. ctx()->s_expr.push(make_expr<InvertExpr>(ctx()->s_expr.popx()));
  285. break;
  286. case TK("*"):
  287. ctx()->s_expr.push(make_expr<StarredExpr>(1, ctx()->s_expr.popx()));
  288. break;
  289. case TK("**"):
  290. ctx()->s_expr.push(make_expr<StarredExpr>(2, ctx()->s_expr.popx()));
  291. break;
  292. default: PK_FATAL_ERROR();
  293. }
  294. }
  295. void Compiler::exprGroup(){
  296. match_newlines_repl();
  297. EXPR_TUPLE(); // () is just for change precedence
  298. match_newlines_repl();
  299. consume(TK(")"));
  300. if(ctx()->s_expr.top()->is_tuple()) return;
  301. Expr_ g = make_expr<GroupedExpr>(ctx()->s_expr.popx());
  302. ctx()->s_expr.push(std::move(g));
  303. }
  304. void Compiler::consume_comp(unique_ptr_128<CompExpr> ce, Expr_ expr){
  305. ce->expr = std::move(expr);
  306. ce->vars = EXPR_VARS();
  307. consume(TK("in"));
  308. parse_expression(PREC_TERNARY + 1);
  309. ce->iter = ctx()->s_expr.popx();
  310. match_newlines_repl();
  311. if(match(TK("if"))){
  312. parse_expression(PREC_TERNARY + 1);
  313. ce->cond = ctx()->s_expr.popx();
  314. }
  315. ctx()->s_expr.push(std::move(ce));
  316. match_newlines_repl();
  317. }
  318. void Compiler::exprList() {
  319. int line = prev().line;
  320. Expr_vector items;
  321. do {
  322. match_newlines_repl();
  323. if (curr().type == TK("]")) break;
  324. EXPR();
  325. items.push_back(ctx()->s_expr.popx());
  326. match_newlines_repl();
  327. if(items.size()==1 && match(TK("for"))){
  328. consume_comp(make_expr<ListCompExpr>(), std::move(items[0]));
  329. consume(TK("]"));
  330. return;
  331. }
  332. match_newlines_repl();
  333. } while (match(TK(",")));
  334. consume(TK("]"));
  335. auto e = make_expr<ListExpr>(std::move(items));
  336. e->line = line; // override line
  337. ctx()->s_expr.push(std::move(e));
  338. }
  339. void Compiler::exprMap() {
  340. bool parsing_dict = false; // {...} may be dict or set
  341. Expr_vector items;
  342. do {
  343. match_newlines_repl();
  344. if (curr().type == TK("}")) break;
  345. EXPR();
  346. int star_level = ctx()->s_expr.top()->star_level();
  347. if(star_level==2 || curr().type == TK(":")){
  348. parsing_dict = true;
  349. }
  350. if(parsing_dict){
  351. auto dict_item = make_expr<DictItemExpr>();
  352. if(star_level == 2){
  353. dict_item->key = nullptr;
  354. dict_item->value = ctx()->s_expr.popx();
  355. }else{
  356. consume(TK(":"));
  357. EXPR();
  358. dict_item->key = ctx()->s_expr.popx();
  359. dict_item->value = ctx()->s_expr.popx();
  360. }
  361. items.push_back(std::move(dict_item));
  362. }else{
  363. items.push_back(ctx()->s_expr.popx());
  364. }
  365. match_newlines_repl();
  366. if(items.size()==1 && match(TK("for"))){
  367. if(parsing_dict) consume_comp(make_expr<DictCompExpr>(), std::move(items[0]));
  368. else consume_comp(make_expr<SetCompExpr>(), std::move(items[0]));
  369. consume(TK("}"));
  370. return;
  371. }
  372. match_newlines_repl();
  373. } while (match(TK(",")));
  374. consume(TK("}"));
  375. if(items.size()==0 || parsing_dict){
  376. auto e = make_expr<DictExpr>(std::move(items));
  377. ctx()->s_expr.push(std::move(e));
  378. }else{
  379. auto e = make_expr<SetExpr>(std::move(items));
  380. ctx()->s_expr.push(std::move(e));
  381. }
  382. }
  383. void Compiler::exprCall() {
  384. auto e = make_expr<CallExpr>();
  385. e->callable = ctx()->s_expr.popx();
  386. do {
  387. match_newlines_repl();
  388. if (curr().type==TK(")")) break;
  389. if(curr().type==TK("@id") && next().type==TK("=")) {
  390. consume(TK("@id"));
  391. Str key = prev().str();
  392. consume(TK("="));
  393. EXPR();
  394. e->kwargs.push_back({key, ctx()->s_expr.popx()});
  395. } else{
  396. EXPR();
  397. if(ctx()->s_expr.top()->star_level() == 2){
  398. // **kwargs
  399. e->kwargs.push_back({"**", ctx()->s_expr.popx()});
  400. }else{
  401. // positional argument
  402. if(!e->kwargs.empty()) SyntaxError("positional argument follows keyword argument");
  403. e->args.push_back(ctx()->s_expr.popx());
  404. }
  405. }
  406. match_newlines_repl();
  407. } while (match(TK(",")));
  408. consume(TK(")"));
  409. if(e->args.size() > 32767) SyntaxError("too many positional arguments");
  410. if(e->kwargs.size() > 32767) SyntaxError("too many keyword arguments");
  411. ctx()->s_expr.push(std::move(e));
  412. }
  413. void Compiler::exprName(){
  414. Str name = prev().str();
  415. NameScope scope = name_scope();
  416. if(ctx()->global_names.count(name)){
  417. scope = NAME_GLOBAL;
  418. }
  419. ctx()->s_expr.push(make_expr<NameExpr>(name, scope));
  420. }
  421. void Compiler::exprAttrib() {
  422. consume(TK("@id"));
  423. ctx()->s_expr.push(
  424. make_expr<AttribExpr>(ctx()->s_expr.popx(), StrName::get(prev().sv()))
  425. );
  426. }
  427. void Compiler::exprSlice0() {
  428. auto slice = make_expr<SliceExpr>();
  429. if(is_expression()){ // :<stop>
  430. EXPR();
  431. slice->stop = ctx()->s_expr.popx();
  432. // try optional step
  433. if(match(TK(":"))){ // :<stop>:<step>
  434. EXPR();
  435. slice->step = ctx()->s_expr.popx();
  436. }
  437. }else if(match(TK(":"))){
  438. if(is_expression()){ // ::<step>
  439. EXPR();
  440. slice->step = ctx()->s_expr.popx();
  441. } // else ::
  442. } // else :
  443. ctx()->s_expr.push(std::move(slice));
  444. }
  445. void Compiler::exprSlice1() {
  446. auto slice = make_expr<SliceExpr>();
  447. slice->start = ctx()->s_expr.popx();
  448. if(is_expression()){ // <start>:<stop>
  449. EXPR();
  450. slice->stop = ctx()->s_expr.popx();
  451. // try optional step
  452. if(match(TK(":"))){ // <start>:<stop>:<step>
  453. EXPR();
  454. slice->step = ctx()->s_expr.popx();
  455. }
  456. }else if(match(TK(":"))){ // <start>::<step>
  457. EXPR();
  458. slice->step = ctx()->s_expr.popx();
  459. } // else <start>:
  460. ctx()->s_expr.push(std::move(slice));
  461. }
  462. void Compiler::exprSubscr() {
  463. auto e = make_expr<SubscrExpr>();
  464. match_newlines_repl();
  465. e->a = ctx()->s_expr.popx(); // a
  466. EXPR_TUPLE(true);
  467. e->b = ctx()->s_expr.popx(); // a[<expr>]
  468. match_newlines_repl();
  469. consume(TK("]"));
  470. ctx()->s_expr.push(std::move(e));
  471. }
  472. void Compiler::exprLiteral0() {
  473. ctx()->s_expr.push(make_expr<Literal0Expr>(prev().type));
  474. }
  475. void Compiler::compile_block_body(void (Compiler::*callback)()) {
  476. if(callback == nullptr) callback = &Compiler::compile_stmt;
  477. consume(TK(":"));
  478. if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){
  479. while(true){
  480. compile_stmt();
  481. bool possible = curr().type!=TK("@eol") && curr().type!=TK("@eof");
  482. if(prev().type != TK(";") || !possible) break;
  483. }
  484. return;
  485. }
  486. if(!match_newlines(mode()==REPL_MODE)){
  487. SyntaxError("expected a new line after ':'");
  488. }
  489. consume(TK("@indent"));
  490. while (curr().type != TK("@dedent")) {
  491. match_newlines();
  492. (this->*callback)();
  493. match_newlines();
  494. }
  495. consume(TK("@dedent"));
  496. }
  497. // import a [as b]
  498. // import a [as b], c [as d]
  499. void Compiler::compile_normal_import() {
  500. do {
  501. consume(TK("@id"));
  502. Str name = prev().str();
  503. ctx()->emit_(OP_IMPORT_PATH, ctx()->add_const_string(name.sv()), prev().line);
  504. if (match(TK("as"))) {
  505. consume(TK("@id"));
  506. name = prev().str();
  507. }
  508. ctx()->emit_store_name(name_scope(), StrName(name), prev().line);
  509. } while (match(TK(",")));
  510. consume_end_stmt();
  511. }
  512. // from a import b [as c], d [as e]
  513. // from a.b import c [as d]
  514. // from . import a [as b]
  515. // from .a import b [as c]
  516. // from ..a import b [as c]
  517. // from .a.b import c [as d]
  518. // from xxx import *
  519. void Compiler::compile_from_import() {
  520. int dots = 0;
  521. while(true){
  522. switch(curr().type){
  523. case TK("."): dots+=1; break;
  524. case TK(".."): dots+=2; break;
  525. case TK("..."): dots+=3; break;
  526. default: goto __EAT_DOTS_END;
  527. }
  528. advance();
  529. }
  530. __EAT_DOTS_END:
  531. SStream ss;
  532. for(int i=0; i<dots; i++) ss << '.';
  533. if(dots > 0){
  534. // @id is optional if dots > 0
  535. if(match(TK("@id"))){
  536. ss << prev().sv();
  537. while (match(TK("."))) {
  538. consume(TK("@id"));
  539. ss << "." << prev().sv();
  540. }
  541. }
  542. }else{
  543. // @id is required if dots == 0
  544. consume(TK("@id"));
  545. ss << prev().sv();
  546. while (match(TK("."))) {
  547. consume(TK("@id"));
  548. ss << "." << prev().sv();
  549. }
  550. }
  551. ctx()->emit_(OP_IMPORT_PATH, ctx()->add_const_string(ss.str().sv()), prev().line);
  552. consume(TK("import"));
  553. if (match(TK("*"))) {
  554. if(name_scope() != NAME_GLOBAL) SyntaxError("from <module> import * can only be used in global scope");
  555. // pop the module and import __all__
  556. ctx()->emit_(OP_POP_IMPORT_STAR, BC_NOARG, prev().line);
  557. consume_end_stmt();
  558. return;
  559. }
  560. do {
  561. ctx()->emit_(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  562. consume(TK("@id"));
  563. Str name = prev().str();
  564. ctx()->emit_(OP_LOAD_ATTR, StrName(name).index, prev().line);
  565. if (match(TK("as"))) {
  566. consume(TK("@id"));
  567. name = prev().str();
  568. }
  569. ctx()->emit_store_name(name_scope(), StrName(name), prev().line);
  570. } while (match(TK(",")));
  571. ctx()->emit_(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  572. consume_end_stmt();
  573. }
  574. bool Compiler::is_expression(bool allow_slice){
  575. PrattCallback prefix = rules[curr().type].prefix;
  576. return prefix != nullptr && (allow_slice || curr().type!=TK(":"));
  577. }
  578. void Compiler::parse_expression(int precedence, bool allow_slice) {
  579. PrattCallback prefix = rules[curr().type].prefix;
  580. if (prefix==nullptr || (curr().type==TK(":") && !allow_slice)){
  581. SyntaxError(Str("expected an expression, got ") + TK_STR(curr().type));
  582. }
  583. advance();
  584. (this->*prefix)();
  585. while (rules[curr().type].precedence >= precedence && (allow_slice || curr().type!=TK(":"))) {
  586. TokenIndex op = curr().type;
  587. advance();
  588. PrattCallback infix = rules[op].infix;
  589. PK_ASSERT(infix != nullptr);
  590. (this->*infix)();
  591. }
  592. }
  593. void Compiler::compile_if_stmt() {
  594. EXPR(); // condition
  595. ctx()->emit_expr();
  596. int patch = ctx()->emit_(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  597. compile_block_body();
  598. if (match(TK("elif"))) {
  599. int exit_patch = ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, prev().line);
  600. ctx()->patch_jump(patch);
  601. compile_if_stmt();
  602. ctx()->patch_jump(exit_patch);
  603. } else if (match(TK("else"))) {
  604. int exit_patch = ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, prev().line);
  605. ctx()->patch_jump(patch);
  606. compile_block_body();
  607. ctx()->patch_jump(exit_patch);
  608. } else {
  609. ctx()->patch_jump(patch);
  610. }
  611. }
  612. void Compiler::compile_while_loop() {
  613. CodeBlock* block = ctx()->enter_block(CodeBlockType::WHILE_LOOP);
  614. EXPR(); // condition
  615. ctx()->emit_expr();
  616. int patch = ctx()->emit_(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  617. compile_block_body();
  618. ctx()->emit_(OP_LOOP_CONTINUE, ctx()->get_loop(), BC_KEEPLINE, true);
  619. ctx()->patch_jump(patch);
  620. ctx()->exit_block();
  621. // optional else clause
  622. if (match(TK("else"))) {
  623. compile_block_body();
  624. block->end2 = ctx()->co->codes.size();
  625. }
  626. }
  627. void Compiler::compile_for_loop() {
  628. Expr_ vars = EXPR_VARS();
  629. consume(TK("in"));
  630. EXPR_TUPLE(); ctx()->emit_expr();
  631. ctx()->emit_(OP_GET_ITER_NEW, BC_NOARG, BC_KEEPLINE);
  632. CodeBlock* block = ctx()->enter_block(CodeBlockType::FOR_LOOP);
  633. int for_codei = ctx()->emit_(OP_FOR_ITER, ctx()->curr_iblock, BC_KEEPLINE);
  634. bool ok = vars->emit_store(ctx());
  635. if(!ok) SyntaxError(); // this error occurs in `vars` instead of this line, but...nevermind
  636. ctx()->try_merge_for_iter_store(for_codei);
  637. compile_block_body();
  638. ctx()->emit_(OP_LOOP_CONTINUE, ctx()->get_loop(), BC_KEEPLINE, true);
  639. ctx()->exit_block();
  640. // optional else clause
  641. if (match(TK("else"))) {
  642. compile_block_body();
  643. block->end2 = ctx()->co->codes.size();
  644. }
  645. }
  646. void Compiler::compile_try_except() {
  647. ctx()->enter_block(CodeBlockType::TRY_EXCEPT);
  648. ctx()->emit_(OP_TRY_ENTER, BC_NOARG, prev().line);
  649. compile_block_body();
  650. small_vector_2<int, 6> patches;
  651. patches.push_back(
  652. ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, BC_KEEPLINE)
  653. );
  654. ctx()->exit_block();
  655. int finally_entry = -1;
  656. if(curr().type != TK("finally")){
  657. do {
  658. StrName as_name;
  659. consume(TK("except"));
  660. if(is_expression()){
  661. EXPR(); // push assumed type on to the stack
  662. ctx()->emit_expr();
  663. ctx()->emit_(OP_EXCEPTION_MATCH, BC_NOARG, prev().line);
  664. if(match(TK("as"))){
  665. consume(TK("@id"));
  666. as_name = StrName(prev().sv());
  667. }
  668. }else{
  669. ctx()->emit_(OP_LOAD_TRUE, BC_NOARG, BC_KEEPLINE);
  670. }
  671. int patch = ctx()->emit_(OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE);
  672. // on match
  673. if(!as_name.empty()){
  674. ctx()->emit_(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  675. ctx()->emit_store_name(name_scope(), as_name, BC_KEEPLINE);
  676. }
  677. // pop the exception
  678. ctx()->emit_(OP_POP_EXCEPTION, BC_NOARG, BC_KEEPLINE);
  679. compile_block_body();
  680. patches.push_back(ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, BC_KEEPLINE));
  681. ctx()->patch_jump(patch);
  682. }while(curr().type == TK("except"));
  683. }
  684. if(match(TK("finally"))){
  685. int patch = ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, BC_KEEPLINE);
  686. finally_entry = ctx()->co->codes.size();
  687. compile_block_body();
  688. ctx()->emit_(OP_JUMP_ABSOLUTE_TOP, BC_NOARG, BC_KEEPLINE);
  689. ctx()->patch_jump(patch);
  690. }
  691. // no match, re-raise
  692. if(finally_entry != -1){
  693. i64 target = ctx()->co->codes.size()+2;
  694. ctx()->emit_(OP_LOAD_CONST, ctx()->add_const(VAR(target)), BC_KEEPLINE);
  695. int i = ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, BC_KEEPLINE);
  696. ctx()->co->codes[i].set_signed_arg(finally_entry - i);
  697. }
  698. ctx()->emit_(OP_RE_RAISE, BC_NOARG, BC_KEEPLINE);
  699. // no exception or no match, jump to the end
  700. for (int patch : patches) ctx()->patch_jump(patch);
  701. if(finally_entry != -1){
  702. i64 target = ctx()->co->codes.size()+2;
  703. ctx()->emit_(OP_LOAD_CONST, ctx()->add_const(VAR(target)), BC_KEEPLINE);
  704. int i = ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, BC_KEEPLINE);
  705. ctx()->co->codes[i].set_signed_arg(finally_entry - i);
  706. }
  707. }
  708. void Compiler::compile_decorated(){
  709. Expr_vector decorators;
  710. do{
  711. EXPR();
  712. decorators.push_back(ctx()->s_expr.popx());
  713. if(!match_newlines_repl()) SyntaxError();
  714. }while(match(TK("@")));
  715. if(match(TK("class"))){
  716. compile_class(decorators);
  717. }else{
  718. consume(TK("def"));
  719. compile_function(decorators);
  720. }
  721. }
  722. bool Compiler::try_compile_assignment(){
  723. switch (curr().type) {
  724. case TK("+="): case TK("-="): case TK("*="): case TK("/="): case TK("//="): case TK("%="):
  725. case TK("<<="): case TK(">>="): case TK("&="): case TK("|="): case TK("^="): {
  726. Expr* lhs_p = ctx()->s_expr.top().get();
  727. if(lhs_p->is_starred()) SyntaxError();
  728. if(ctx()->is_compiling_class) SyntaxError("can't use inplace operator in class definition");
  729. advance();
  730. // a[x] += 1; a and x should be evaluated only once
  731. // a.x += 1; a should be evaluated only once
  732. auto e = make_expr<BinaryExpr>(true); // inplace=true
  733. e->op = prev().type - 1; // -1 to remove =
  734. e->lhs = ctx()->s_expr.popx();
  735. EXPR_TUPLE();
  736. e->rhs = ctx()->s_expr.popx();
  737. if(e->rhs->is_starred()) SyntaxError();
  738. e->emit_(ctx());
  739. bool ok = lhs_p->emit_store_inplace(ctx());
  740. if(!ok) SyntaxError();
  741. } return true;
  742. case TK("="): {
  743. int n = 0;
  744. while(match(TK("="))){
  745. EXPR_TUPLE();
  746. n += 1;
  747. }
  748. // stack size is n+1
  749. Expr_ val = ctx()->s_expr.popx();
  750. val->emit_(ctx());
  751. for(int j=1; j<n; j++) ctx()->emit_(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  752. for(int j=0; j<n; j++){
  753. auto e = ctx()->s_expr.popx();
  754. if(e->is_starred()) SyntaxError();
  755. bool ok = e->emit_store(ctx());
  756. if(!ok) SyntaxError();
  757. }
  758. } return true;
  759. default: return false;
  760. }
  761. }
  762. void Compiler::compile_stmt() {
  763. if(match(TK("class"))){
  764. compile_class();
  765. return;
  766. }
  767. advance();
  768. int kw_line = prev().line; // backup line number
  769. int curr_loop_block = ctx()->get_loop();
  770. switch(prev().type){
  771. case TK("break"):
  772. if (curr_loop_block < 0) SyntaxError("'break' outside loop");
  773. ctx()->emit_(OP_LOOP_BREAK, curr_loop_block, kw_line);
  774. consume_end_stmt();
  775. break;
  776. case TK("continue"):
  777. if (curr_loop_block < 0) SyntaxError("'continue' not properly in loop");
  778. ctx()->emit_(OP_LOOP_CONTINUE, curr_loop_block, kw_line);
  779. consume_end_stmt();
  780. break;
  781. case TK("yield"):
  782. if (contexts.size() <= 1) SyntaxError("'yield' outside function");
  783. EXPR_TUPLE(); ctx()->emit_expr();
  784. ctx()->emit_(OP_YIELD_VALUE, BC_NOARG, kw_line);
  785. consume_end_stmt();
  786. break;
  787. case TK("yield from"):
  788. if (contexts.size() <= 1) SyntaxError("'yield from' outside function");
  789. EXPR_TUPLE(); ctx()->emit_expr();
  790. ctx()->emit_(OP_GET_ITER_NEW, BC_NOARG, kw_line);
  791. ctx()->enter_block(CodeBlockType::FOR_LOOP);
  792. ctx()->emit_(OP_FOR_ITER_YIELD_VALUE, BC_NOARG, kw_line);
  793. ctx()->emit_(OP_LOOP_CONTINUE, ctx()->get_loop(), kw_line);
  794. ctx()->exit_block();
  795. consume_end_stmt();
  796. break;
  797. case TK("return"):
  798. if (contexts.size() <= 1) SyntaxError("'return' outside function");
  799. if(match_end_stmt()){
  800. ctx()->emit_(OP_RETURN_VALUE, 1, kw_line);
  801. }else{
  802. EXPR_TUPLE(); ctx()->emit_expr();
  803. consume_end_stmt();
  804. ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, kw_line);
  805. }
  806. break;
  807. /*************************************************/
  808. case TK("if"): compile_if_stmt(); break;
  809. case TK("while"): compile_while_loop(); break;
  810. case TK("for"): compile_for_loop(); break;
  811. case TK("import"): compile_normal_import(); break;
  812. case TK("from"): compile_from_import(); break;
  813. case TK("def"): compile_function(); break;
  814. case TK("@"): compile_decorated(); break;
  815. case TK("try"): compile_try_except(); break;
  816. case TK("pass"): consume_end_stmt(); break;
  817. /*************************************************/
  818. case TK("++"):{
  819. consume(TK("@id"));
  820. StrName name(prev().sv());
  821. NameScope scope = name_scope();
  822. bool is_global = ctx()->global_names.count(name.sv());
  823. if(is_global) scope = NAME_GLOBAL;
  824. switch(scope){
  825. case NAME_LOCAL:
  826. ctx()->emit_(OP_INC_FAST, ctx()->add_varname(name), prev().line);
  827. break;
  828. case NAME_GLOBAL:
  829. ctx()->emit_(OP_INC_GLOBAL, name.index, prev().line);
  830. break;
  831. default: SyntaxError(); break;
  832. }
  833. consume_end_stmt();
  834. break;
  835. }
  836. case TK("--"):{
  837. consume(TK("@id"));
  838. StrName name(prev().sv());
  839. switch(name_scope()){
  840. case NAME_LOCAL:
  841. ctx()->emit_(OP_DEC_FAST, ctx()->add_varname(name), prev().line);
  842. break;
  843. case NAME_GLOBAL:
  844. ctx()->emit_(OP_DEC_GLOBAL, name.index, prev().line);
  845. break;
  846. default: SyntaxError(); break;
  847. }
  848. consume_end_stmt();
  849. break;
  850. }
  851. case TK("assert"):{
  852. EXPR(); // condition
  853. ctx()->emit_expr();
  854. int index = ctx()->emit_(OP_POP_JUMP_IF_TRUE, BC_NOARG, kw_line);
  855. int has_msg = 0;
  856. if(match(TK(","))){
  857. EXPR(); // message
  858. ctx()->emit_expr();
  859. has_msg = 1;
  860. }
  861. ctx()->emit_(OP_RAISE_ASSERT, has_msg, kw_line);
  862. ctx()->patch_jump(index);
  863. consume_end_stmt();
  864. break;
  865. }
  866. case TK("global"):
  867. do {
  868. consume(TK("@id"));
  869. ctx()->global_names.insert(prev().str());
  870. } while (match(TK(",")));
  871. consume_end_stmt();
  872. break;
  873. case TK("raise"): {
  874. EXPR(); ctx()->emit_expr();
  875. ctx()->emit_(OP_RAISE, BC_NOARG, kw_line);
  876. consume_end_stmt();
  877. } break;
  878. case TK("del"): {
  879. EXPR_TUPLE();
  880. Expr_ e = ctx()->s_expr.popx();
  881. bool ok = e->emit_del(ctx());
  882. if(!ok) SyntaxError();
  883. consume_end_stmt();
  884. } break;
  885. case TK("with"): {
  886. EXPR(); // [ <expr> ]
  887. ctx()->emit_expr();
  888. ctx()->enter_block(CodeBlockType::CONTEXT_MANAGER);
  889. Expr_ as_name;
  890. if(match(TK("as"))){
  891. consume(TK("@id"));
  892. as_name = make_expr<NameExpr>(prev().str(), name_scope());
  893. }
  894. ctx()->emit_(OP_WITH_ENTER, BC_NOARG, prev().line);
  895. // [ <expr> <expr>.__enter__() ]
  896. if(as_name != nullptr){
  897. bool ok = as_name->emit_store(ctx());
  898. if(!ok) SyntaxError();
  899. }else{
  900. ctx()->emit_(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  901. }
  902. compile_block_body();
  903. ctx()->emit_(OP_WITH_EXIT, BC_NOARG, prev().line);
  904. ctx()->exit_block();
  905. } break;
  906. /*************************************************/
  907. case TK("=="): {
  908. consume(TK("@id"));
  909. if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE");
  910. bool ok = ctx()->add_label(prev().str());
  911. consume(TK("=="));
  912. if(!ok) SyntaxError("label " + prev().str().escape() + " already exists");
  913. consume_end_stmt();
  914. } break;
  915. case TK("->"):
  916. consume(TK("@id"));
  917. if(mode()!=EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE");
  918. ctx()->emit_(OP_GOTO, StrName(prev().sv()).index, prev().line);
  919. consume_end_stmt();
  920. break;
  921. /*************************************************/
  922. // handle dangling expression or assignment
  923. default: {
  924. advance(-1); // do revert since we have pre-called advance() at the beginning
  925. EXPR_TUPLE();
  926. bool is_typed_name = false; // e.g. x: int
  927. // eat variable's type hint if it is a single name
  928. if(ctx()->s_expr.top()->is_name()){
  929. if(match(TK(":"))){
  930. consume_type_hints();
  931. is_typed_name = true;
  932. if(ctx()->is_compiling_class){
  933. NameExpr* ne = static_cast<NameExpr*>(ctx()->s_expr.top().get());
  934. ctx()->emit_(OP_ADD_CLASS_ANNOTATION, ne->name.index, BC_KEEPLINE);
  935. }
  936. }
  937. }
  938. if(!try_compile_assignment()){
  939. if(!ctx()->s_expr.empty() && ctx()->s_expr.top()->is_starred()){
  940. SyntaxError();
  941. }
  942. if(!is_typed_name){
  943. ctx()->emit_expr();
  944. if((mode()==CELL_MODE || mode()==REPL_MODE) && name_scope()==NAME_GLOBAL){
  945. ctx()->emit_(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE);
  946. }else{
  947. ctx()->emit_(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  948. }
  949. }else{
  950. PK_ASSERT(ctx()->s_expr.size() == 1)
  951. ctx()->s_expr.pop();
  952. }
  953. }
  954. consume_end_stmt();
  955. }
  956. }
  957. }
  958. void Compiler::consume_type_hints(){
  959. EXPR();
  960. ctx()->s_expr.pop();
  961. }
  962. void Compiler::_add_decorators(const Expr_vector& decorators){
  963. // [obj]
  964. for(auto it=decorators.rbegin(); it!=decorators.rend(); ++it){
  965. (*it)->emit_(ctx()); // [obj, f]
  966. ctx()->emit_(OP_ROT_TWO, BC_NOARG, (*it)->line); // [f, obj]
  967. ctx()->emit_(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE); // [f, obj, NULL]
  968. ctx()->emit_(OP_ROT_TWO, BC_NOARG, BC_KEEPLINE); // [obj, NULL, f]
  969. ctx()->emit_(OP_CALL, 1, (*it)->line); // [obj]
  970. }
  971. }
  972. void Compiler::compile_class(const Expr_vector& decorators){
  973. consume(TK("@id"));
  974. int namei = StrName(prev().sv()).index;
  975. Expr_ base = nullptr;
  976. if(match(TK("("))){
  977. if(is_expression()){
  978. EXPR();
  979. base = ctx()->s_expr.popx();
  980. }
  981. consume(TK(")"));
  982. }
  983. if(base == nullptr){
  984. ctx()->emit_(OP_LOAD_NONE, BC_NOARG, prev().line);
  985. }else {
  986. base->emit_(ctx());
  987. }
  988. ctx()->emit_(OP_BEGIN_CLASS, namei, BC_KEEPLINE);
  989. for(auto& c: this->contexts.container()){
  990. if(c.is_compiling_class){
  991. SyntaxError("nested class is not allowed");
  992. }
  993. }
  994. ctx()->is_compiling_class = true;
  995. compile_block_body();
  996. ctx()->is_compiling_class = false;
  997. if(!decorators.empty()){
  998. ctx()->emit_(OP_BEGIN_CLASS_DECORATION, BC_NOARG, BC_KEEPLINE);
  999. _add_decorators(decorators);
  1000. ctx()->emit_(OP_END_CLASS_DECORATION, BC_NOARG, BC_KEEPLINE);
  1001. }
  1002. ctx()->emit_(OP_END_CLASS, namei, BC_KEEPLINE);
  1003. }
  1004. void Compiler::_compile_f_args(FuncDecl_ decl, bool enable_type_hints){
  1005. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  1006. do {
  1007. if(state > 3) SyntaxError();
  1008. if(state == 3) SyntaxError("**kwargs should be the last argument");
  1009. match_newlines();
  1010. if(match(TK("*"))){
  1011. if(state < 1) state = 1;
  1012. else SyntaxError("*args should be placed before **kwargs");
  1013. }
  1014. else if(match(TK("**"))){
  1015. state = 3;
  1016. }
  1017. consume(TK("@id"));
  1018. StrName name = prev().str();
  1019. // check duplicate argument name
  1020. for(int j: decl->args){
  1021. if(decl->code->varnames[j] == name) {
  1022. SyntaxError("duplicate argument name");
  1023. }
  1024. }
  1025. for(auto& kv: decl->kwargs){
  1026. if(decl->code->varnames[kv.index] == name){
  1027. SyntaxError("duplicate argument name");
  1028. }
  1029. }
  1030. if(decl->starred_arg!=-1 && decl->code->varnames[decl->starred_arg] == name){
  1031. SyntaxError("duplicate argument name");
  1032. }
  1033. if(decl->starred_kwarg!=-1 && decl->code->varnames[decl->starred_kwarg] == name){
  1034. SyntaxError("duplicate argument name");
  1035. }
  1036. // eat type hints
  1037. if(enable_type_hints && match(TK(":"))) consume_type_hints();
  1038. if(state == 0 && curr().type == TK("=")) state = 2;
  1039. int index = ctx()->add_varname(name);
  1040. switch (state)
  1041. {
  1042. case 0:
  1043. decl->args.push_back(index);
  1044. break;
  1045. case 1:
  1046. decl->starred_arg = index;
  1047. state+=1;
  1048. break;
  1049. case 2: {
  1050. consume(TK("="));
  1051. PyVar value = read_literal();
  1052. if(value == nullptr){
  1053. SyntaxError(Str("default argument must be a literal"));
  1054. }
  1055. decl->add_kwarg(index, name, value);
  1056. } break;
  1057. case 3:
  1058. decl->starred_kwarg = index;
  1059. state+=1;
  1060. break;
  1061. }
  1062. } while (match(TK(",")));
  1063. }
  1064. void Compiler::compile_function(const Expr_vector& decorators){
  1065. consume(TK("@id"));
  1066. Str decl_name = prev().str();
  1067. FuncDecl_ decl = push_f_context(decl_name);
  1068. consume(TK("("));
  1069. if (!match(TK(")"))) {
  1070. _compile_f_args(decl, true);
  1071. consume(TK(")"));
  1072. }
  1073. if(match(TK("->"))) consume_type_hints();
  1074. compile_block_body();
  1075. pop_context();
  1076. decl->docstring = nullptr;
  1077. if(decl->code->codes.size()>=2 && decl->code->codes[0].op == OP_LOAD_CONST && decl->code->codes[1].op == OP_POP_TOP){
  1078. PyVar c = decl->code->consts[decl->code->codes[0].arg];
  1079. if(is_type(c, vm->tp_str)){
  1080. decl->code->codes[0].op = OP_NO_OP;
  1081. decl->code->codes[1].op = OP_NO_OP;
  1082. decl->docstring = PK_OBJ_GET(Str, c).c_str();
  1083. }
  1084. }
  1085. ctx()->emit_(OP_LOAD_FUNCTION, ctx()->add_func_decl(decl), prev().line);
  1086. _add_decorators(decorators);
  1087. if(!ctx()->is_compiling_class){
  1088. auto e = make_expr<NameExpr>(decl_name, name_scope());
  1089. e->emit_store(ctx());
  1090. }else{
  1091. int index = StrName(decl_name).index;
  1092. ctx()->emit_(OP_STORE_CLASS_ATTR, index, prev().line);
  1093. }
  1094. }
  1095. PyVar Compiler::to_object(const TokenValue& value){
  1096. PyVar obj = nullptr;
  1097. if(std::holds_alternative<i64>(value)){
  1098. obj = VAR(std::get<i64>(value));
  1099. }
  1100. if(std::holds_alternative<f64>(value)){
  1101. obj = VAR(std::get<f64>(value));
  1102. }
  1103. if(std::holds_alternative<Str>(value)){
  1104. obj = VAR(std::get<Str>(value));
  1105. }
  1106. PK_ASSERT(obj != nullptr)
  1107. return obj;
  1108. }
  1109. PyVar Compiler::read_literal(){
  1110. advance();
  1111. switch(prev().type){
  1112. case TK("-"): {
  1113. consume(TK("@num"));
  1114. PyVar val = to_object(prev().value);
  1115. return vm->py_negate(val);
  1116. }
  1117. case TK("@num"): return to_object(prev().value);
  1118. case TK("@str"): return to_object(prev().value);
  1119. case TK("True"): return VAR(true);
  1120. case TK("False"): return VAR(false);
  1121. case TK("None"): return vm->None;
  1122. case TK("..."): return vm->Ellipsis;
  1123. case TK("("): {
  1124. List cpnts;
  1125. while(true) {
  1126. cpnts.push_back(read_literal());
  1127. if(curr().type == TK(")")) break;
  1128. consume(TK(","));
  1129. if(curr().type == TK(")")) break;
  1130. }
  1131. consume(TK(")"));
  1132. return VAR(cpnts.to_tuple());
  1133. }
  1134. default: break;
  1135. }
  1136. return nullptr;
  1137. }
  1138. Compiler::Compiler(VM* vm, std::string_view source, const Str& filename, CompileMode mode, bool unknown_global_scope)
  1139. :lexer(vm, std::make_shared<SourceData>(source, filename, mode)){
  1140. this->vm = vm;
  1141. this->unknown_global_scope = unknown_global_scope;
  1142. init_pratt_rules();
  1143. }
  1144. Str Compiler::precompile(){
  1145. auto tokens = lexer.run();
  1146. SStream ss;
  1147. ss << "pkpy:" PK_VERSION << '\n'; // L1: version string
  1148. ss << (int)mode() << '\n'; // L2: mode
  1149. std::map<std::string_view, int> token_indices;
  1150. for(auto token: tokens){
  1151. if(is_raw_string_used(token.type)){
  1152. auto it = token_indices.find(token.sv());
  1153. if(it == token_indices.end()){
  1154. token_indices[token.sv()] = 0;
  1155. // assert no '\n' in token.sv()
  1156. for(char c: token.sv()) if(c=='\n') PK_FATAL_ERROR();
  1157. }
  1158. }
  1159. }
  1160. ss << "=" << (int)token_indices.size() << '\n'; // L3: raw string count
  1161. int index = 0;
  1162. for(auto& kv: token_indices){
  1163. ss << kv.first << '\n'; // L4: raw strings
  1164. kv.second = index++;
  1165. }
  1166. ss << "=" << (int)tokens.size() << '\n'; // L5: token count
  1167. for(int i=0; i<tokens.size(); i++){
  1168. const Token& token = tokens[i];
  1169. ss << (int)token.type << ',';
  1170. if(is_raw_string_used(token.type)){
  1171. ss << token_indices[token.sv()] << ',';
  1172. }
  1173. if(i>0 && tokens[i-1].line == token.line) ss << ',';
  1174. else ss << token.line << ',';
  1175. if(i>0 && tokens[i-1].brackets_level == token.brackets_level) ss << ',';
  1176. else ss << token.brackets_level << ',';
  1177. // visit token value
  1178. std::visit([&ss](auto&& arg){
  1179. using T = std::decay_t<decltype(arg)>;
  1180. if constexpr(std::is_same_v<T, i64>){
  1181. ss << 'I' << arg;
  1182. }else if constexpr(std::is_same_v<T, f64>){
  1183. ss << 'F' << arg;
  1184. }else if constexpr(std::is_same_v<T, Str>){
  1185. ss << 'S';
  1186. for(char c: arg) ss.write_hex((unsigned char)c);
  1187. }
  1188. ss << '\n';
  1189. }, token.value);
  1190. }
  1191. return ss.str();
  1192. }
  1193. void Compiler::from_precompiled(const char* source){
  1194. TokenDeserializer deserializer(source);
  1195. deserializer.curr += 5; // skip "pkpy:"
  1196. std::string_view version = deserializer.read_string('\n');
  1197. if(version != PK_VERSION){
  1198. Str error = _S("precompiled version mismatch: ", version, "!=" PK_VERSION);
  1199. throw std::runtime_error(error.c_str());
  1200. }
  1201. if(deserializer.read_uint('\n') != (i64)mode()){
  1202. throw std::runtime_error("precompiled mode mismatch");
  1203. }
  1204. int count = deserializer.read_count();
  1205. vector<Str>& precompiled_tokens = lexer.src->_precompiled_tokens;
  1206. for(int i=0; i<count; i++){
  1207. precompiled_tokens.push_back(deserializer.read_string('\n'));
  1208. }
  1209. count = deserializer.read_count();
  1210. for(int i=0; i<count; i++){
  1211. Token t;
  1212. t.type = (unsigned char)deserializer.read_uint(',');
  1213. if(is_raw_string_used(t.type)){
  1214. i64 index = deserializer.read_uint(',');
  1215. t.start = precompiled_tokens[index].c_str();
  1216. t.length = precompiled_tokens[index].size;
  1217. }else{
  1218. t.start = nullptr;
  1219. t.length = 0;
  1220. }
  1221. if(deserializer.match_char(',')){
  1222. t.line = tokens.back().line;
  1223. }else{
  1224. t.line = (int)deserializer.read_uint(',');
  1225. }
  1226. if(deserializer.match_char(',')){
  1227. t.brackets_level = tokens.back().brackets_level;
  1228. }else{
  1229. t.brackets_level = (int)deserializer.read_uint(',');
  1230. }
  1231. char type = deserializer.read_char();
  1232. switch(type){
  1233. case 'I': t.value = deserializer.read_uint('\n'); break;
  1234. case 'F': t.value = deserializer.read_float('\n'); break;
  1235. case 'S':
  1236. t.value = deserializer.read_string_from_hex('\n');
  1237. break;
  1238. default: t.value = {}; break;
  1239. }
  1240. tokens.push_back(t);
  1241. }
  1242. }
  1243. CodeObject_ Compiler::compile(){
  1244. PK_ASSERT(i == 0) // make sure it is the first time to compile
  1245. if(lexer.src->is_precompiled){
  1246. from_precompiled(lexer.src->source.c_str());
  1247. }else{
  1248. this->tokens = lexer.run();
  1249. }
  1250. CodeObject_ code = push_global_context();
  1251. advance(); // skip @sof, so prev() is always valid
  1252. match_newlines(); // skip possible leading '\n'
  1253. if(mode()==EVAL_MODE) {
  1254. EXPR_TUPLE(); ctx()->emit_expr();
  1255. consume(TK("@eof"));
  1256. ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  1257. pop_context();
  1258. return code;
  1259. }else if(mode()==JSON_MODE){
  1260. EXPR();
  1261. Expr_ e = ctx()->s_expr.popx();
  1262. if(!e->is_json_object()) SyntaxError("expect a JSON object, literal or array");
  1263. consume(TK("@eof"));
  1264. e->emit_(ctx());
  1265. ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  1266. pop_context();
  1267. return code;
  1268. }
  1269. while (!match(TK("@eof"))) {
  1270. compile_stmt();
  1271. match_newlines();
  1272. }
  1273. pop_context();
  1274. return code;
  1275. }
  1276. // TODO: refactor this
  1277. void Lexer::throw_err(StrName type, Str msg, int lineno, const char* cursor){
  1278. PyVar e_obj = vm->call(vm->builtins->attr(type), VAR(msg));
  1279. Exception& e = PK_OBJ_GET(Exception, e_obj);
  1280. e.st_push(src, lineno, cursor, "");
  1281. throw std::move(e);
  1282. }
  1283. std::string_view TokenDeserializer::read_string(char c){
  1284. const char* start = curr;
  1285. while(*curr != c) curr++;
  1286. std::string_view retval(start, curr-start);
  1287. curr++; // skip the delimiter
  1288. return retval;
  1289. }
  1290. Str TokenDeserializer::read_string_from_hex(char c){
  1291. std::string_view s = read_string(c);
  1292. char* buffer = (char*)pool128_alloc(s.size()/2 + 1);
  1293. for(int i=0; i<s.size(); i+=2){
  1294. char c = 0;
  1295. if(s[i]>='0' && s[i]<='9') c += s[i]-'0';
  1296. else if(s[i]>='a' && s[i]<='f') c += s[i]-'a'+10;
  1297. else PK_FATAL_ERROR();
  1298. c <<= 4;
  1299. if(s[i+1]>='0' && s[i+1]<='9') c += s[i+1]-'0';
  1300. else if(s[i+1]>='a' && s[i+1]<='f') c += s[i+1]-'a'+10;
  1301. else PK_FATAL_ERROR();
  1302. buffer[i/2] = c;
  1303. }
  1304. buffer[s.size()/2] = 0;
  1305. return std::pair<char*, int>(buffer, s.size()/2);
  1306. }
  1307. int TokenDeserializer::read_count(){
  1308. PK_ASSERT(*curr == '=')
  1309. curr++;
  1310. return read_uint('\n');
  1311. }
  1312. i64 TokenDeserializer::read_uint(char c){
  1313. i64 out = 0;
  1314. while(*curr != c){
  1315. out = out*10 + (*curr-'0');
  1316. curr++;
  1317. }
  1318. curr++; // skip the delimiter
  1319. return out;
  1320. }
  1321. f64 TokenDeserializer::read_float(char c){
  1322. std::string_view sv = read_string(c);
  1323. return std::stod(std::string(sv));
  1324. }
  1325. } // namespace pkpy