1
0

compiler.h 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913
  1. #pragma once
  2. #include "expr.h"
  3. namespace pkpy{
  4. class Compiler;
  5. typedef void (Compiler::*PrattCallback)();
  6. struct PrattRule{
  7. PrattCallback prefix;
  8. PrattCallback infix;
  9. Precedence precedence;
  10. };
  11. class Compiler {
  12. inline static PrattRule rules[kTokenCount];
  13. std::unique_ptr<Lexer> lexer;
  14. stack<CodeEmitContext> contexts;
  15. VM* vm;
  16. bool used;
  17. // for parsing token stream
  18. int i = 0;
  19. std::vector<Token> tokens;
  20. const Token& prev() { return tokens.at(i-1); }
  21. const Token& curr() { return tokens.at(i); }
  22. const Token& next() { return tokens.at(i+1); }
  23. void advance(int delta=1) { i += delta; }
  24. CodeEmitContext* ctx() { return &contexts.top(); }
  25. CompileMode mode() const{ return lexer->src->mode; }
  26. NameScope name_scope() const { return contexts.size()>1 ? NAME_LOCAL : NAME_GLOBAL; }
  27. template<typename... Args>
  28. CodeObject_ push_context(Args&&... args){
  29. CodeObject_ co = make_sp<CodeObject>(std::forward<Args>(args)...);
  30. contexts.push(CodeEmitContext(vm, co));
  31. return co;
  32. }
  33. void pop_context(){
  34. if(!ctx()->s_expr.empty()) UNREACHABLE();
  35. // if the last op does not return, add a default return None
  36. if(ctx()->co->codes.empty() || ctx()->co->codes.back().op != OP_RETURN_VALUE){
  37. ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE);
  38. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  39. }
  40. ctx()->co->optimize(vm);
  41. contexts.pop();
  42. }
  43. static void init_pratt_rules(){
  44. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  45. #define METHOD(name) &Compiler::name
  46. #define NO_INFIX nullptr, PREC_NONE
  47. for(TokenIndex i=0; i<kTokenCount; i++) rules[i] = { nullptr, NO_INFIX };
  48. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  49. rules[TK("(")] = { METHOD(exprGroup), METHOD(exprCall), PREC_CALL };
  50. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscr), PREC_SUBSCRIPT };
  51. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  52. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  53. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  54. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  55. rules[TK("*")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_FACTOR };
  56. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  57. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  58. rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT };
  59. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  60. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  61. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  62. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  63. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  64. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  65. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  66. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  67. rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  68. rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  69. rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND };
  70. rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR };
  71. rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  72. rules[TK("?")] = { nullptr, METHOD(exprTernary), PREC_TERNARY };
  73. rules[TK(",")] = { nullptr, METHOD(exprTuple), PREC_TUPLE };
  74. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  75. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  76. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  77. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  78. rules[TK("not")] = { METHOD(exprNot), nullptr, PREC_LOGICAL_NOT };
  79. rules[TK("True")] = { METHOD(exprLiteral0), NO_INFIX };
  80. rules[TK("False")] = { METHOD(exprLiteral0), NO_INFIX };
  81. rules[TK("None")] = { METHOD(exprLiteral0), NO_INFIX };
  82. rules[TK("...")] = { METHOD(exprLiteral0), NO_INFIX };
  83. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  84. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  85. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  86. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  87. rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
  88. #undef METHOD
  89. #undef NO_INFIX
  90. }
  91. bool match(TokenIndex expected) {
  92. if (curr().type != expected) return false;
  93. advance();
  94. return true;
  95. }
  96. void consume(TokenIndex expected) {
  97. if (!match(expected)){
  98. StrStream ss;
  99. ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(curr().type) << "'";
  100. SyntaxError(ss.str());
  101. }
  102. }
  103. bool match_newlines_repl(){
  104. return match_newlines(mode()==REPL_MODE);
  105. }
  106. bool match_newlines(bool repl_throw=false) {
  107. bool consumed = false;
  108. if (curr().type == TK("@eol")) {
  109. while (curr().type == TK("@eol")) advance();
  110. consumed = true;
  111. }
  112. if (repl_throw && curr().type == TK("@eof")){
  113. throw NeedMoreLines(ctx()->is_compiling_class);
  114. }
  115. return consumed;
  116. }
  117. bool match_end_stmt() {
  118. if (match(TK(";"))) { match_newlines(); return true; }
  119. if (match_newlines() || curr().type == TK("@eof")) return true;
  120. if (curr().type == TK("@dedent")) return true;
  121. return false;
  122. }
  123. void consume_end_stmt() {
  124. if (!match_end_stmt()) SyntaxError("expected statement end");
  125. }
  126. /*************************************************/
  127. void EXPR(bool push_stack=true) {
  128. parse_expression(PREC_TUPLE+1, push_stack);
  129. }
  130. void EXPR_TUPLE(bool push_stack=true) {
  131. parse_expression(PREC_TUPLE, push_stack);
  132. }
  133. template <typename T, typename... Args>
  134. std::unique_ptr<T> make_expr(Args&&... args) {
  135. std::unique_ptr<T> expr = std::make_unique<T>(std::forward<Args>(args)...);
  136. expr->line = prev().line;
  137. return expr;
  138. }
  139. // PASS
  140. void exprLiteral(){
  141. ctx()->s_expr.push(make_expr<LiteralExpr>(prev().value));
  142. }
  143. // PASS
  144. void exprFString(){
  145. ctx()->s_expr.push(make_expr<FStringExpr>(std::get<Str>(prev().value)));
  146. }
  147. // PASS
  148. void exprLambda(){
  149. auto e = make_expr<LambdaExpr>();
  150. e->decl.name = "<lambda>";
  151. e->scope = name_scope();
  152. if(!match(TK(":"))){
  153. _compile_f_args(e->decl, false);
  154. consume(TK(":"));
  155. }
  156. e->decl.code = push_context(lexer->src, "<lambda>");
  157. EXPR(false); // https://github.com/blueloveTH/pocketpy/issues/37
  158. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  159. pop_context();
  160. ctx()->s_expr.push(std::move(e));
  161. }
  162. // PASS
  163. void exprTuple(){
  164. std::vector<Expr_> items;
  165. do {
  166. EXPR(); // NOTE: "1," will fail, "1,2" will be ok
  167. items.push_back(ctx()->s_expr.popx());
  168. } while(match(TK(",")));
  169. ctx()->s_expr.push(make_expr<TupleExpr>(
  170. std::move(items)
  171. ));
  172. }
  173. // PASS
  174. void exprOr(){
  175. auto e = make_expr<OrExpr>();
  176. e->lhs = ctx()->s_expr.popx();
  177. parse_expression(PREC_LOGICAL_OR + 1);
  178. e->rhs = ctx()->s_expr.popx();
  179. ctx()->s_expr.push(std::move(e));
  180. }
  181. // PASS
  182. void exprAnd(){
  183. auto e = make_expr<AndExpr>();
  184. e->lhs = ctx()->s_expr.popx();
  185. parse_expression(PREC_LOGICAL_AND + 1);
  186. e->rhs = ctx()->s_expr.popx();
  187. ctx()->s_expr.push(std::move(e));
  188. }
  189. // PASS
  190. void exprTernary(){
  191. auto e = make_expr<TernaryExpr>();
  192. e->cond = ctx()->s_expr.popx();
  193. EXPR(); // if true
  194. e->true_expr = ctx()->s_expr.popx();
  195. consume(TK(":"));
  196. EXPR(); // if false
  197. e->false_expr = ctx()->s_expr.popx();
  198. ctx()->s_expr.push(std::move(e));
  199. }
  200. // PASS
  201. void exprBinaryOp(){
  202. auto e = make_expr<BinaryExpr>();
  203. e->op = prev().type;
  204. e->lhs = ctx()->s_expr.popx();
  205. parse_expression(rules[e->op].precedence + 1);
  206. e->rhs = ctx()->s_expr.popx();
  207. ctx()->s_expr.push(std::move(e));
  208. }
  209. // PASS
  210. void exprNot() {
  211. parse_expression(PREC_LOGICAL_NOT + 1);
  212. ctx()->s_expr.push(make_expr<NotExpr>(ctx()->s_expr.popx()));
  213. }
  214. // PASS
  215. void exprUnaryOp(){
  216. TokenIndex op = prev().type;
  217. parse_expression(PREC_UNARY + 1);
  218. switch(op){
  219. case TK("-"):
  220. ctx()->s_expr.push(make_expr<NegatedExpr>(ctx()->s_expr.popx()));
  221. break;
  222. case TK("*"):
  223. ctx()->s_expr.push(make_expr<StarredExpr>(ctx()->s_expr.popx()));
  224. break;
  225. default: UNREACHABLE();
  226. }
  227. }
  228. // PASS
  229. void exprGroup(){
  230. match_newlines_repl();
  231. EXPR_TUPLE(); // () is just for change precedence
  232. match_newlines_repl();
  233. consume(TK(")"));
  234. }
  235. // PASS
  236. template<typename T>
  237. void _consume_comp(Expr_ expr){
  238. static_assert(std::is_base_of<CompExpr, T>::value);
  239. std::unique_ptr<CompExpr> ce = std::make_unique<T>();
  240. ce->expr = std::move(expr);
  241. EXPR_TUPLE(); // must be a lvalue
  242. ce->vars = ctx()->s_expr.popx();
  243. consume(TK("in"));
  244. EXPR();
  245. ce->iter = ctx()->s_expr.popx();
  246. match_newlines_repl();
  247. if(match(TK("if"))){
  248. EXPR();
  249. ce->cond = ctx()->s_expr.popx();
  250. }
  251. ctx()->s_expr.push(std::move(ce));
  252. match_newlines_repl();
  253. }
  254. // PASS
  255. void exprList() {
  256. int line = prev().line;
  257. std::vector<Expr_> items;
  258. do {
  259. match_newlines_repl();
  260. if (curr().type == TK("]")) break;
  261. EXPR();
  262. items.push_back(ctx()->s_expr.popx());
  263. match_newlines_repl();
  264. if(items.size()==1 && match(TK("for"))){
  265. _consume_comp<ListCompExpr>(std::move(items[0]));
  266. consume(TK("]"));
  267. return;
  268. }
  269. match_newlines_repl();
  270. } while (match(TK(",")));
  271. consume(TK("]"));
  272. auto e = make_expr<ListExpr>(std::move(items));
  273. e->line = line; // override line
  274. ctx()->s_expr.push(std::move(e));
  275. }
  276. // PASS
  277. void exprMap() {
  278. bool parsing_dict = false; // {...} may be dict or set
  279. std::vector<Expr_> items;
  280. do {
  281. match_newlines_repl();
  282. if (curr().type == TK("}")) break;
  283. EXPR();
  284. if(curr().type == TK(":")) parsing_dict = true;
  285. if(parsing_dict){
  286. consume(TK(":"));
  287. EXPR();
  288. auto dict_item = make_expr<DictItemExpr>();
  289. dict_item->key = ctx()->s_expr.popx();
  290. dict_item->value = ctx()->s_expr.popx();
  291. items.push_back(std::move(dict_item));
  292. }else{
  293. items.push_back(ctx()->s_expr.popx());
  294. }
  295. match_newlines_repl();
  296. if(items.size()==1 && match(TK("for"))){
  297. if(parsing_dict) _consume_comp<DictCompExpr>(std::move(items[0]));
  298. else _consume_comp<SetCompExpr>(std::move(items[0]));
  299. consume(TK("}"));
  300. return;
  301. }
  302. match_newlines_repl();
  303. } while (match(TK(",")));
  304. consume(TK("}"));
  305. if(items.size()==0 || parsing_dict){
  306. auto e = make_expr<DictExpr>(std::move(items));
  307. ctx()->s_expr.push(std::move(e));
  308. }else{
  309. auto e = make_expr<SetExpr>(std::move(items));
  310. ctx()->s_expr.push(std::move(e));
  311. }
  312. }
  313. // PASS
  314. void exprCall() {
  315. auto e = make_expr<CallExpr>();
  316. e->callable = ctx()->s_expr.popx();
  317. do {
  318. match_newlines_repl();
  319. if (curr().type==TK(")")) break;
  320. if(curr().type==TK("@id") && next().type==TK("=")) {
  321. consume(TK("@id"));
  322. Str key = prev().str();
  323. consume(TK("="));
  324. EXPR();
  325. e->kwargs.push_back({key, ctx()->s_expr.popx()});
  326. } else{
  327. if(!e->kwargs.empty()) SyntaxError("positional argument follows keyword argument");
  328. EXPR();
  329. e->args.push_back(ctx()->s_expr.popx());
  330. }
  331. match_newlines_repl();
  332. } while (match(TK(",")));
  333. consume(TK(")"));
  334. if(e->args.size() > 32767) SyntaxError("too many positional arguments");
  335. if(e->kwargs.size() > 32767) SyntaxError("too many keyword arguments");
  336. ctx()->s_expr.push(std::move(e));
  337. }
  338. // PASS
  339. void exprName(){
  340. ctx()->s_expr.push(make_expr<NameExpr>(prev().str(), name_scope()));
  341. }
  342. // PASS
  343. void exprAttrib() {
  344. consume(TK("@id"));
  345. ctx()->s_expr.push(
  346. make_expr<AttribExpr>(ctx()->s_expr.popx(), prev().str())
  347. );
  348. }
  349. // PASS
  350. void exprSubscr() {
  351. auto e = make_expr<SubscrExpr>();
  352. std::vector<Expr_> items;
  353. do {
  354. EXPR_TUPLE();
  355. items.push_back(ctx()->s_expr.popx());
  356. } while(match(TK(":")));
  357. consume(TK("]"));
  358. switch(items.size()){
  359. case 1:
  360. e->b = std::move(items[0]);
  361. break;
  362. case 2: case 3: {
  363. auto slice = make_expr<SliceExpr>();
  364. slice->start = std::move(items[0]);
  365. slice->stop = std::move(items[1]);
  366. if(items.size()==3){
  367. slice->step = std::move(items[2]);
  368. }
  369. e->b = std::move(slice);
  370. } break;
  371. default: SyntaxError(); break;
  372. }
  373. ctx()->s_expr.push(std::move(e));
  374. }
  375. // PASS
  376. void exprLiteral0() {
  377. ctx()->s_expr.push(make_expr<Literal0Expr>(prev().type));
  378. }
  379. void compile_block_body() {
  380. consume(TK(":"));
  381. if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){
  382. compile_stmt(); // inline block
  383. return;
  384. }
  385. if(!match_newlines(mode()==REPL_MODE)){
  386. SyntaxError("expected a new line after ':'");
  387. }
  388. consume(TK("@indent"));
  389. while (curr().type != TK("@dedent")) {
  390. match_newlines();
  391. compile_stmt();
  392. match_newlines();
  393. }
  394. consume(TK("@dedent"));
  395. }
  396. Str _compile_import() {
  397. consume(TK("@id"));
  398. Str name = prev().str();
  399. int index = ctx()->add_name(name);
  400. ctx()->emit(OP_IMPORT_NAME, index, prev().line);
  401. return name;
  402. }
  403. // import a as b
  404. void compile_normal_import() {
  405. do {
  406. Str name = _compile_import();
  407. if (match(TK("as"))) {
  408. consume(TK("@id"));
  409. name = prev().str();
  410. }
  411. int index = ctx()->add_name(name);
  412. auto op = name_scope()==NAME_LOCAL ? OP_STORE_LOCAL : OP_STORE_GLOBAL;
  413. ctx()->emit(op, index, prev().line);
  414. } while (match(TK(",")));
  415. consume_end_stmt();
  416. }
  417. // from a import b as c, d as e
  418. void compile_from_import() {
  419. _compile_import();
  420. consume(TK("import"));
  421. if (match(TK("*"))) {
  422. if(name_scope() != NAME_GLOBAL) SyntaxError("import * should be used in global scope");
  423. ctx()->emit(OP_IMPORT_STAR, BC_NOARG, prev().line);
  424. consume_end_stmt();
  425. return;
  426. }
  427. do {
  428. ctx()->emit(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  429. consume(TK("@id"));
  430. Str name = prev().str();
  431. int index = ctx()->add_name(name);
  432. ctx()->emit(OP_LOAD_ATTR, index, prev().line);
  433. if (match(TK("as"))) {
  434. consume(TK("@id"));
  435. name = prev().str();
  436. }
  437. index = ctx()->add_name(name);
  438. auto op = name_scope()==NAME_LOCAL ? OP_STORE_LOCAL : OP_STORE_GLOBAL;
  439. ctx()->emit(op, index, prev().line);
  440. } while (match(TK(",")));
  441. ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  442. consume_end_stmt();
  443. }
  444. void parse_expression(int precedence, bool push_stack=true) {
  445. advance();
  446. PrattCallback prefix = rules[prev().type].prefix;
  447. if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(prev().type));
  448. (this->*prefix)();
  449. while (rules[curr().type].precedence >= precedence) {
  450. TokenIndex op = curr().type;
  451. advance();
  452. PrattCallback infix = rules[op].infix;
  453. if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true");
  454. (this->*infix)();
  455. }
  456. if(!push_stack) ctx()->emit_expr();
  457. }
  458. // PASS
  459. void compile_if_stmt() {
  460. EXPR(false); // condition
  461. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  462. compile_block_body();
  463. if (match(TK("elif"))) {
  464. int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  465. ctx()->patch_jump(patch);
  466. compile_if_stmt();
  467. ctx()->patch_jump(exit_patch);
  468. } else if (match(TK("else"))) {
  469. int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  470. ctx()->patch_jump(patch);
  471. compile_block_body();
  472. ctx()->patch_jump(exit_patch);
  473. } else {
  474. ctx()->patch_jump(patch);
  475. }
  476. }
  477. // PASS
  478. void compile_while_loop() {
  479. ctx()->enter_block(WHILE_LOOP);
  480. EXPR(false); // condition
  481. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  482. compile_block_body();
  483. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  484. ctx()->patch_jump(patch);
  485. ctx()->exit_block();
  486. }
  487. // PASS
  488. void compile_for_loop() {
  489. EXPR_TUPLE();
  490. Expr_ vars = ctx()->s_expr.popx();
  491. consume(TK("in"));
  492. EXPR(false);
  493. ctx()->emit(OP_GET_ITER, BC_NOARG, BC_KEEPLINE);
  494. ctx()->enter_block(FOR_LOOP);
  495. ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE);
  496. bool ok = vars->emit_store(ctx());
  497. if(!ok) SyntaxError(); // this error occurs in `vars` instead of this line, but...nevermind
  498. compile_block_body();
  499. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  500. ctx()->exit_block();
  501. }
  502. void compile_try_except() {
  503. // ctx()->enter_block(TRY_EXCEPT);
  504. // ctx()->emit(OP_TRY_BLOCK_ENTER, BC_NOARG, prev().line);
  505. // compile_block_body();
  506. // ctx()->emit(OP_TRY_BLOCK_EXIT, BC_NOARG, BC_KEEPLINE);
  507. // std::vector<int> patches = {
  508. // ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE)
  509. // };
  510. // ctx()->exit_block();
  511. // do {
  512. // consume(TK("except"));
  513. // if(match(TK("@id"))){
  514. // int name_idx = ctx()->add_name(prev().str(), NAME_SPECIAL);
  515. // emit(OP_EXCEPTION_MATCH, name_idx);
  516. // }else{
  517. // emit(OP_LOAD_TRUE);
  518. // }
  519. // int patch = emit(OP_POP_JUMP_IF_FALSE);
  520. // emit(OP_POP_TOP); // pop the exception on match
  521. // compile_block_body();
  522. // patches.push_back(emit(OP_JUMP_ABSOLUTE));
  523. // patch_jump(patch);
  524. // }while(curr().type == TK("except"));
  525. // emit(OP_RE_RAISE); // no match, re-raise
  526. // for (int patch : patches) patch_jump(patch);
  527. }
  528. void compile_decorated(){
  529. EXPR(false);
  530. // TODO: support multiple decorator
  531. // use a while loop to consume '@'
  532. if(!match_newlines_repl()) SyntaxError();
  533. ctx()->emit(OP_SETUP_DECORATOR, BC_NOARG, prev().line);
  534. consume(TK("def"));
  535. compile_function();
  536. }
  537. bool try_compile_assignment(){
  538. Expr* lhs_p = ctx()->s_expr.top().get();
  539. bool inplace;
  540. switch (curr().type) {
  541. case TK("+="): case TK("-="): case TK("*="): case TK("/="): case TK("//="): case TK("%="):
  542. case TK("<<="): case TK(">>="): case TK("&="): case TK("|="): case TK("^="): {
  543. inplace = true;
  544. advance();
  545. auto e = make_expr<BinaryExpr>();
  546. e->op = prev().type - 1; // -1 to remove =
  547. e->lhs = ctx()->s_expr.popx();
  548. EXPR_TUPLE();
  549. e->rhs = ctx()->s_expr.popx();
  550. ctx()->s_expr.push(std::move(e));
  551. } break;
  552. case TK("="):
  553. inplace = false;
  554. advance();
  555. EXPR_TUPLE();
  556. break;
  557. default: return false;
  558. }
  559. Expr_ rhs = ctx()->s_expr.popx();
  560. rhs->emit(ctx());
  561. bool ok = lhs_p->emit_store(ctx());
  562. if(!ok) SyntaxError();
  563. if(!inplace) ctx()->s_expr.pop();
  564. return true;
  565. }
  566. void compile_stmt() {
  567. advance();
  568. int kw_line = prev().line; // backup line number
  569. switch(prev().type){
  570. case TK("break"):
  571. if (!ctx()->is_curr_block_loop()) SyntaxError("'break' outside loop");
  572. ctx()->emit(OP_LOOP_BREAK, BC_NOARG, kw_line);
  573. consume_end_stmt();
  574. break;
  575. case TK("continue"):
  576. if (!ctx()->is_curr_block_loop()) SyntaxError("'continue' not properly in loop");
  577. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, kw_line);
  578. consume_end_stmt();
  579. break;
  580. case TK("yield"):
  581. if (contexts.size() <= 1) SyntaxError("'yield' outside function");
  582. EXPR_TUPLE(true);
  583. // if yield present, mark the function as generator
  584. ctx()->co->is_generator = true;
  585. ctx()->emit(OP_YIELD_VALUE, BC_NOARG, kw_line);
  586. consume_end_stmt();
  587. break;
  588. case TK("return"):
  589. if (contexts.size() <= 1) SyntaxError("'return' outside function");
  590. if(match_end_stmt()){
  591. ctx()->emit(OP_LOAD_NONE, BC_NOARG, kw_line);
  592. }else{
  593. EXPR_TUPLE(true);
  594. consume_end_stmt();
  595. }
  596. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, kw_line);
  597. break;
  598. /*************************************************/
  599. case TK("if"): compile_if_stmt(); break;
  600. case TK("while"): compile_while_loop(); break;
  601. case TK("for"): compile_for_loop(); break;
  602. case TK("import"): compile_normal_import(); break;
  603. case TK("from"): compile_from_import(); break;
  604. case TK("def"): compile_function(); break;
  605. case TK("@"): compile_decorated(); break;
  606. case TK("try"): compile_try_except(); break;
  607. case TK("pass"): consume_end_stmt(); break;
  608. /*************************************************/
  609. case TK("assert"):
  610. EXPR_TUPLE(true);
  611. // TODO: change OP_ASSERT impl in ceval.h
  612. ctx()->emit(OP_ASSERT, BC_NOARG, kw_line);
  613. consume_end_stmt();
  614. break;
  615. case TK("global"):
  616. do {
  617. consume(TK("@id"));
  618. ctx()->co->global_names.insert(prev().str());
  619. } while (match(TK(",")));
  620. consume_end_stmt();
  621. break;
  622. case TK("raise"): {
  623. consume(TK("@id"));
  624. int dummy_t = ctx()->add_name(prev().str());
  625. if(match(TK("(")) && !match(TK(")"))){
  626. EXPR(false); consume(TK(")"));
  627. }else{
  628. ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE);
  629. }
  630. ctx()->emit(OP_RAISE, dummy_t, kw_line);
  631. consume_end_stmt();
  632. } break;
  633. case TK("del"): {
  634. EXPR_TUPLE();
  635. Expr_ e = ctx()->s_expr.popx();
  636. bool ok = e->emit_del(ctx());
  637. if(!ok) SyntaxError();
  638. consume_end_stmt();
  639. } break;
  640. case TK("with"): {
  641. // TODO: reimpl this
  642. UNREACHABLE();
  643. // EXPR(false);
  644. // consume(TK("as"));
  645. // consume(TK("@id"));
  646. // int index = ctx()->add_name(prev().str(), name_scope());
  647. // emit(OP_STORE_NAME, index);
  648. // emit(OP_LOAD_NAME_REF, index);
  649. // emit(OP_WITH_ENTER);
  650. // compile_block_body();
  651. // emit(OP_LOAD_NAME_REF, index);
  652. // emit(OP_WITH_EXIT);
  653. } break;
  654. /*************************************************/
  655. // TODO: refactor goto/label use special $ syntax
  656. case TK("label"): {
  657. if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE");
  658. consume(TK(".")); consume(TK("@id"));
  659. bool ok = ctx()->add_label(prev().str());
  660. if(!ok) SyntaxError("label " + prev().str().escape(true) + " already exists");
  661. consume_end_stmt();
  662. } break;
  663. case TK("goto"):
  664. if(mode()!=EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE");
  665. consume(TK(".")); consume(TK("@id"));
  666. ctx()->emit(OP_GOTO, ctx()->add_name(prev().str()), prev().line);
  667. consume_end_stmt();
  668. break;
  669. /*************************************************/
  670. // handle dangling expression or assignment
  671. default: {
  672. advance(-1); // do revert since we have pre-called advance() at the beginning
  673. EXPR_TUPLE();
  674. if(!try_compile_assignment()){
  675. ctx()->emit_expr();
  676. if(mode()==REPL_MODE && name_scope()==NAME_GLOBAL){
  677. ctx()->emit(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE);
  678. }else{
  679. ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  680. }
  681. }
  682. consume_end_stmt();
  683. }
  684. }
  685. }
  686. // PASS
  687. void compile_class(){
  688. consume(TK("@id"));
  689. int namei = ctx()->add_name(prev().str());
  690. int super_namei = -1;
  691. if(match(TK("(")) && match(TK("@id"))){
  692. super_namei = ctx()->add_name(prev().str());
  693. consume(TK(")"));
  694. }
  695. if(super_namei == -1) ctx()->emit(OP_LOAD_NONE, BC_NOARG, prev().line);
  696. else ctx()->emit(OP_LOAD_NAME, super_namei, prev().line);
  697. ctx()->emit(OP_BEGIN_CLASS, namei, BC_KEEPLINE);
  698. ctx()->is_compiling_class = true;
  699. compile_block_body();
  700. ctx()->is_compiling_class = false;
  701. ctx()->emit(OP_END_CLASS, BC_NOARG, BC_KEEPLINE);
  702. }
  703. void _compile_f_args(FunctionDecl& func, bool enable_type_hints){
  704. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  705. do {
  706. if(state == 3) SyntaxError("**kwargs should be the last argument");
  707. match_newlines();
  708. if(match(TK("*"))){
  709. if(state < 1) state = 1;
  710. else SyntaxError("*args should be placed before **kwargs");
  711. }
  712. else if(match(TK("**"))){
  713. state = 3;
  714. }
  715. consume(TK("@id"));
  716. const Str& name = prev().str();
  717. if(func.has_name(name)) SyntaxError("duplicate argument name");
  718. // eat type hints
  719. if(enable_type_hints && match(TK(":"))) consume(TK("@id"));
  720. if(state == 0 && curr().type == TK("=")) state = 2;
  721. switch (state)
  722. {
  723. case 0: func.args.push_back(name); break;
  724. case 1: func.starred_arg = name; state+=1; break;
  725. case 2: {
  726. consume(TK("="));
  727. PyObject* value = read_literal();
  728. if(value == nullptr){
  729. SyntaxError(Str("expect a literal, not ") + TK_STR(curr().type));
  730. }
  731. func.kwargs.set(name, value);
  732. func.kwargs_order.push_back(name);
  733. } break;
  734. case 3: SyntaxError("**kwargs is not supported yet"); break;
  735. }
  736. } while (match(TK(",")));
  737. }
  738. void compile_function(){
  739. // TODO: bug, if there are multiple decorators, will cause error
  740. FunctionDecl func;
  741. StrName obj_name;
  742. consume(TK("@id"));
  743. func.name = prev().str();
  744. if(!ctx()->is_compiling_class && match(TK("::"))){
  745. consume(TK("@id"));
  746. obj_name = func.name;
  747. func.name = prev().str();
  748. }
  749. consume(TK("("));
  750. if (!match(TK(")"))) {
  751. _compile_f_args(func, true);
  752. consume(TK(")"));
  753. }
  754. if(match(TK("->"))){
  755. if(!match(TK("None"))) consume(TK("@id"));
  756. }
  757. func.code = push_context(lexer->src, func.name.str());
  758. compile_block_body();
  759. pop_context();
  760. ctx()->emit(OP_LOAD_FUNCTION, ctx()->add_func_decl(func), prev().line);
  761. if(!ctx()->is_compiling_class){
  762. if(obj_name.empty()){
  763. auto e = make_expr<NameExpr>(func.name, name_scope());
  764. e->emit_store(ctx());
  765. } else {
  766. ctx()->emit(OP_LOAD_NAME, ctx()->add_name(obj_name), prev().line);
  767. int index = ctx()->add_name(func.name);
  768. ctx()->emit(OP_STORE_ATTR, index, prev().line);
  769. }
  770. }else{
  771. ctx()->emit(OP_STORE_CLASS_ATTR, ctx()->add_name(func.name), BC_KEEPLINE);
  772. }
  773. }
  774. PyObject* read_literal(){
  775. advance();
  776. switch(prev().type){
  777. case TK("-"): {
  778. consume(TK("@num"));
  779. PyObject* val = LiteralExpr(prev().value).to_object(ctx());
  780. return vm->num_negated(val);
  781. }
  782. case TK("@num"): return LiteralExpr(prev().value).to_object(ctx());
  783. case TK("@str"): return LiteralExpr(prev().value).to_object(ctx());
  784. case TK("True"): return VAR(true);
  785. case TK("False"): return VAR(false);
  786. case TK("None"): return vm->None;
  787. case TK("..."): return vm->Ellipsis;
  788. default: break;
  789. }
  790. return nullptr;
  791. }
  792. void SyntaxError(Str msg){ lexer->throw_err("SyntaxError", msg, curr().line, curr().start); }
  793. void SyntaxError(){ lexer->throw_err("SyntaxError", "invalid syntax", curr().line, curr().start); }
  794. void IndentationError(Str msg){ lexer->throw_err("IndentationError", msg, curr().line, curr().start); }
  795. public:
  796. Compiler(VM* vm, const char* source, Str filename, CompileMode mode){
  797. this->vm = vm;
  798. this->used = false;
  799. this->lexer = std::make_unique<Lexer>(
  800. make_sp<SourceData>(source, filename, mode)
  801. );
  802. // TODO: check if already initialized
  803. init_pratt_rules();
  804. }
  805. CodeObject_ compile(){
  806. if(used) UNREACHABLE();
  807. used = true;
  808. tokens = lexer->run();
  809. // if(lexer->src->filename == "<stdin>"){
  810. // for(auto& t: tokens) std::cout << t.info() << std::endl;
  811. // }
  812. CodeObject_ code = push_context(lexer->src, lexer->src->filename);
  813. advance(); // skip @sof, so prev() is always valid
  814. match_newlines(); // skip possible leading '\n'
  815. if(mode()==EVAL_MODE) {
  816. EXPR_TUPLE();
  817. consume(TK("@eof"));
  818. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  819. pop_context();
  820. return code;
  821. }else if(mode()==JSON_MODE){
  822. PyObject* value = read_literal();
  823. if(value != nullptr) ctx()->emit(OP_LOAD_CONST, ctx()->add_const(value), prev().line);
  824. else if(match(TK("{"))) exprMap();
  825. else if(match(TK("["))) exprList();
  826. else SyntaxError("expect a JSON object or array");
  827. consume(TK("@eof"));
  828. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  829. pop_context();
  830. return code;
  831. }
  832. while (!match(TK("@eof"))) {
  833. if (match(TK("class"))) {
  834. compile_class();
  835. } else {
  836. compile_stmt();
  837. }
  838. match_newlines();
  839. }
  840. pop_context();
  841. return code;
  842. }
  843. };
  844. } // namespace pkpy