compiler.h 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917
  1. #pragma once
  2. #include "expr.h"
  3. namespace pkpy{
  4. class Compiler;
  5. typedef void (Compiler::*PrattCallback)();
  6. struct PrattRule{
  7. PrattCallback prefix;
  8. PrattCallback infix;
  9. Precedence precedence;
  10. };
  11. class Compiler {
  12. inline static PrattRule rules[kTokenCount];
  13. std::unique_ptr<Lexer> lexer;
  14. stack<CodeEmitContext> contexts;
  15. VM* vm;
  16. bool used;
  17. // for parsing token stream
  18. int i = 0;
  19. std::vector<Token> tokens;
  20. const Token& prev() { return tokens.at(i-1); }
  21. const Token& curr() { return tokens.at(i); }
  22. const Token& next() { return tokens.at(i+1); }
  23. void advance() { i++; }
  24. CodeEmitContext* ctx() { return &contexts.top(); }
  25. CompileMode mode() const{ return lexer->src->mode; }
  26. NameScope name_scope() const { return contexts.size()>1 ? NAME_LOCAL : NAME_GLOBAL; }
  27. template<typename... Args>
  28. CodeObject_ push_context(Args&&... args){
  29. CodeObject_ co = make_sp<CodeObject>(std::forward<Args>(args)...);
  30. contexts.push(CodeEmitContext(vm, co));
  31. return co;
  32. }
  33. void pop_context(){
  34. if(!ctx()->s_expr.empty()) UNREACHABLE();
  35. // if the last op does not return, add a default return None
  36. if(ctx()->co->codes.back().op != OP_RETURN_VALUE){
  37. ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE);
  38. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  39. }
  40. ctx()->co->optimize(vm);
  41. contexts.pop();
  42. }
  43. static void init_pratt_rules(){
  44. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  45. #define METHOD(name) &Compiler::name
  46. #define NO_INFIX nullptr, PREC_NONE
  47. for(TokenIndex i=0; i<kTokenCount; i++) rules[i] = { nullptr, NO_INFIX };
  48. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  49. rules[TK("(")] = { METHOD(exprGroup), METHOD(exprCall), PREC_CALL };
  50. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscr), PREC_SUBSCRIPT };
  51. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  52. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  53. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  54. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  55. rules[TK("*")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_FACTOR };
  56. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  57. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  58. rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT };
  59. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  60. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  61. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  62. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  63. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  64. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  65. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  66. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  67. rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  68. rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  69. rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND };
  70. rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR };
  71. rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  72. rules[TK("?")] = { nullptr, METHOD(exprTernary), PREC_TERNARY };
  73. rules[TK(",")] = { nullptr, METHOD(exprTuple), PREC_TUPLE };
  74. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  75. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  76. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  77. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  78. rules[TK("not")] = { METHOD(exprNot), nullptr, PREC_LOGICAL_NOT };
  79. rules[TK("True")] = { METHOD(exprLiteral0), NO_INFIX };
  80. rules[TK("False")] = { METHOD(exprLiteral0), NO_INFIX };
  81. rules[TK("None")] = { METHOD(exprLiteral0), NO_INFIX };
  82. rules[TK("...")] = { METHOD(exprLiteral0), NO_INFIX };
  83. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  84. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  85. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  86. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  87. rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
  88. #undef METHOD
  89. #undef NO_INFIX
  90. }
  91. bool match(TokenIndex expected) {
  92. if (curr().type != expected) return false;
  93. advance();
  94. return true;
  95. }
  96. void consume(TokenIndex expected) {
  97. if (!match(expected)){
  98. StrStream ss;
  99. ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(curr().type) << "'";
  100. SyntaxError(ss.str());
  101. }
  102. }
  103. bool match_newlines_repl(){
  104. return match_newlines(mode()==REPL_MODE);
  105. }
  106. bool match_newlines(bool repl_throw=false) {
  107. bool consumed = false;
  108. if (curr().type == TK("@eol")) {
  109. while (curr().type == TK("@eol")) advance();
  110. consumed = true;
  111. }
  112. if (repl_throw && curr().type == TK("@eof")){
  113. throw NeedMoreLines(ctx()->is_compiling_class);
  114. }
  115. return consumed;
  116. }
  117. bool match_end_stmt() {
  118. if (match(TK(";"))) { match_newlines(); return true; }
  119. if (match_newlines() || curr().type == TK("@eof")) return true;
  120. if (curr().type == TK("@dedent")) return true;
  121. return false;
  122. }
  123. void consume_end_stmt() {
  124. if (!match_end_stmt()) SyntaxError("expected statement end");
  125. }
  126. /*************************************************/
  127. void EXPR(bool push_stack=true) {
  128. parse_expression(PREC_TUPLE+1, push_stack);
  129. }
  130. void EXPR_TUPLE(bool push_stack=true) {
  131. parse_expression(PREC_TUPLE, push_stack);
  132. }
  133. template <typename T, typename... Args>
  134. std::unique_ptr<T> make_expr(Args&&... args) {
  135. std::unique_ptr<T> expr = std::make_unique<T>(std::forward<Args>(args)...);
  136. expr->line = prev().line;
  137. return expr;
  138. }
  139. // PASS
  140. void exprLiteral(){
  141. ctx()->s_expr.push(make_expr<LiteralExpr>(prev().value));
  142. }
  143. // PASS
  144. void exprFString(){
  145. ctx()->s_expr.push(make_expr<FStringExpr>(std::get<Str>(prev().value)));
  146. }
  147. // PASS
  148. void exprLambda(){
  149. auto e = make_expr<LambdaExpr>();
  150. e->func.name = "<lambda>";
  151. e->scope = name_scope();
  152. if(!match(TK(":"))){
  153. _compile_f_args(e->func, false);
  154. consume(TK(":"));
  155. }
  156. e->func.code = push_context(lexer->src, "<lambda>");
  157. EXPR(false); // https://github.com/blueloveTH/pocketpy/issues/37
  158. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  159. pop_context();
  160. ctx()->s_expr.push(std::move(e));
  161. }
  162. // PASS
  163. void exprTuple(){
  164. auto e = make_expr<TupleExpr>();
  165. do {
  166. EXPR(); // NOTE: "1," will fail, "1,2" will be ok
  167. e->items.push_back(ctx()->s_expr.popx());
  168. } while(match(TK(",")));
  169. ctx()->s_expr.push(std::move(e));
  170. }
  171. // PASS
  172. void exprOr(){
  173. auto e = make_expr<OrExpr>();
  174. e->lhs = ctx()->s_expr.popx();
  175. parse_expression(PREC_LOGICAL_OR + 1);
  176. e->rhs = ctx()->s_expr.popx();
  177. ctx()->s_expr.push(std::move(e));
  178. }
  179. // PASS
  180. void exprAnd(){
  181. auto e = make_expr<AndExpr>();
  182. e->lhs = ctx()->s_expr.popx();
  183. parse_expression(PREC_LOGICAL_AND + 1);
  184. e->rhs = ctx()->s_expr.popx();
  185. ctx()->s_expr.push(std::move(e));
  186. }
  187. // PASS
  188. void exprTernary(){
  189. auto e = make_expr<TernaryExpr>();
  190. e->cond = ctx()->s_expr.popx();
  191. EXPR(); // if true
  192. e->true_expr = ctx()->s_expr.popx();
  193. consume(TK(":"));
  194. EXPR(); // if false
  195. e->false_expr = ctx()->s_expr.popx();
  196. ctx()->s_expr.push(std::move(e));
  197. }
  198. // PASS
  199. void exprBinaryOp(){
  200. auto e = make_expr<BinaryExpr>();
  201. e->op = prev().type;
  202. e->lhs = ctx()->s_expr.popx();
  203. parse_expression(rules[e->op].precedence + 1);
  204. e->rhs = ctx()->s_expr.popx();
  205. ctx()->s_expr.push(std::move(e));
  206. }
  207. // PASS
  208. void exprNot() {
  209. parse_expression(PREC_LOGICAL_NOT + 1);
  210. ctx()->s_expr.push(make_expr<NotExpr>(ctx()->s_expr.popx()));
  211. }
  212. // PASS
  213. void exprUnaryOp(){
  214. TokenIndex op = prev().type;
  215. parse_expression(PREC_UNARY + 1);
  216. switch(op){
  217. case TK("-"):
  218. ctx()->s_expr.push(make_expr<NegatedExpr>(ctx()->s_expr.popx()));
  219. break;
  220. case TK("*"):
  221. ctx()->s_expr.push(make_expr<StarredExpr>(ctx()->s_expr.popx()));
  222. break;
  223. default: UNREACHABLE();
  224. }
  225. }
  226. // PASS
  227. void exprGroup(){
  228. match_newlines_repl();
  229. EXPR_TUPLE(); // () is just for change precedence
  230. match_newlines_repl();
  231. consume(TK(")"));
  232. }
  233. // PASS
  234. template<typename T>
  235. void _consume_comp(Expr_ expr){
  236. static_assert(std::is_base_of<CompExpr, T>::value);
  237. std::unique_ptr<CompExpr> ce = std::make_unique<T>();
  238. ce->expr = std::move(expr);
  239. EXPR_TUPLE(); // must be a lvalue
  240. ce->vars = ctx()->s_expr.popx();
  241. consume(TK("in"));
  242. EXPR();
  243. ce->iter = ctx()->s_expr.popx();
  244. match_newlines_repl();
  245. if(match(TK("if"))){
  246. EXPR();
  247. ce->cond = ctx()->s_expr.popx();
  248. }
  249. ctx()->s_expr.push(std::move(ce));
  250. match_newlines_repl();
  251. }
  252. // PASS
  253. void exprList() {
  254. int line = prev().line;
  255. std::vector<Expr_> items;
  256. do {
  257. match_newlines_repl();
  258. if (curr().type == TK("]")) break;
  259. EXPR();
  260. items.push_back(ctx()->s_expr.popx());
  261. match_newlines_repl();
  262. if(items.size()==1 && match(TK("for"))){
  263. _consume_comp<ListCompExpr>(std::move(items[0]));
  264. consume(TK("]"));
  265. return;
  266. }
  267. match_newlines_repl();
  268. } while (match(TK(",")));
  269. consume(TK("]"));
  270. auto e = make_expr<ListExpr>(std::move(items));
  271. e->line = line; // override line
  272. ctx()->s_expr.push(std::move(e));
  273. }
  274. // PASS
  275. void exprMap() {
  276. bool parsing_dict = false; // {...} may be dict or set
  277. std::vector<Expr_> items;
  278. do {
  279. match_newlines_repl();
  280. if (curr().type == TK("}")) break;
  281. EXPR();
  282. if(curr().type == TK(":")) parsing_dict = true;
  283. if(parsing_dict){
  284. consume(TK(":"));
  285. EXPR();
  286. auto dict_item = make_expr<DictItemExpr>();
  287. dict_item->key = ctx()->s_expr.popx();
  288. dict_item->value = ctx()->s_expr.popx();
  289. items.push_back(std::move(dict_item));
  290. }else{
  291. items.push_back(ctx()->s_expr.popx());
  292. }
  293. match_newlines_repl();
  294. if(items.size()==1 && match(TK("for"))){
  295. if(parsing_dict) _consume_comp<DictCompExpr>(std::move(items[0]));
  296. else _consume_comp<SetCompExpr>(std::move(items[0]));
  297. consume(TK("}"));
  298. return;
  299. }
  300. match_newlines_repl();
  301. } while (match(TK(",")));
  302. consume(TK("}"));
  303. if(items.size()==0 || parsing_dict){
  304. auto e = make_expr<DictExpr>(std::move(items));
  305. ctx()->s_expr.push(std::move(e));
  306. }else{
  307. auto e = make_expr<SetExpr>(std::move(items));
  308. ctx()->s_expr.push(std::move(e));
  309. }
  310. }
  311. // PASS
  312. void exprCall() {
  313. auto e = make_expr<CallExpr>();
  314. e->callable = ctx()->s_expr.popx();
  315. do {
  316. match_newlines_repl();
  317. if (curr().type==TK(")")) break;
  318. if(curr().type==TK("@id") && next().type==TK("=")) {
  319. consume(TK("@id"));
  320. Str key = prev().str();
  321. consume(TK("="));
  322. EXPR();
  323. e->kwargs.push_back({key, ctx()->s_expr.popx()});
  324. } else{
  325. if(!e->kwargs.empty()) SyntaxError("positional argument follows keyword argument");
  326. EXPR();
  327. e->args.push_back(ctx()->s_expr.popx());
  328. }
  329. match_newlines_repl();
  330. } while (match(TK(",")));
  331. consume(TK(")"));
  332. if(e->args.size() > 32767) SyntaxError("too many positional arguments");
  333. if(e->kwargs.size() > 32767) SyntaxError("too many keyword arguments");
  334. ctx()->s_expr.push(std::move(e));
  335. }
  336. // PASS
  337. void exprName(){
  338. ctx()->s_expr.push(make_expr<NameExpr>(prev().str(), name_scope()));
  339. }
  340. // PASS
  341. void exprAttrib() {
  342. consume(TK("@id"));
  343. ctx()->s_expr.push(
  344. make_expr<AttribExpr>(ctx()->s_expr.popx(), prev().str())
  345. );
  346. }
  347. // PASS
  348. void exprSubscr() {
  349. auto e = make_expr<SubscrExpr>();
  350. std::vector<Expr_> items;
  351. do {
  352. EXPR_TUPLE();
  353. items.push_back(ctx()->s_expr.popx());
  354. } while(match(TK(":")));
  355. consume(TK("]"));
  356. switch(items.size()){
  357. case 1:
  358. e->b = std::move(items[0]);
  359. break;
  360. case 2: case 3: {
  361. auto slice = make_expr<SliceExpr>();
  362. slice->start = std::move(items[0]);
  363. slice->stop = std::move(items[1]);
  364. if(items.size()==3){
  365. slice->step = std::move(items[2]);
  366. }
  367. e->b = std::move(slice);
  368. } break;
  369. default: SyntaxError(); break;
  370. }
  371. ctx()->s_expr.push(std::move(e));
  372. }
  373. // PASS
  374. void exprLiteral0() {
  375. ctx()->s_expr.push(make_expr<Literal0Expr>(prev().type));
  376. }
  377. void compile_block_body() {
  378. consume(TK(":"));
  379. if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){
  380. compile_stmt(); // inline block
  381. return;
  382. }
  383. if(!match_newlines(mode()==REPL_MODE)){
  384. SyntaxError("expected a new line after ':'");
  385. }
  386. consume(TK("@indent"));
  387. while (curr().type != TK("@dedent")) {
  388. match_newlines();
  389. compile_stmt();
  390. match_newlines();
  391. }
  392. consume(TK("@dedent"));
  393. }
  394. Str _compile_import() {
  395. consume(TK("@id"));
  396. Str name = prev().str();
  397. int index = ctx()->add_name(name);
  398. ctx()->emit(OP_IMPORT_NAME, index, prev().line);
  399. return name;
  400. }
  401. // import a as b
  402. void compile_normal_import() {
  403. do {
  404. Str name = _compile_import();
  405. if (match(TK("as"))) {
  406. consume(TK("@id"));
  407. name = prev().str();
  408. }
  409. int index = ctx()->add_name(name);
  410. auto op = name_scope()==NAME_LOCAL ? OP_STORE_LOCAL : OP_STORE_GLOBAL;
  411. ctx()->emit(op, index, prev().line);
  412. } while (match(TK(",")));
  413. consume_end_stmt();
  414. }
  415. // from a import b as c, d as e
  416. void compile_from_import() {
  417. _compile_import();
  418. consume(TK("import"));
  419. if (match(TK("*"))) {
  420. if(name_scope() != NAME_GLOBAL) SyntaxError("import * should be used in global scope");
  421. ctx()->emit(OP_IMPORT_STAR, BC_NOARG, prev().line);
  422. consume_end_stmt();
  423. return;
  424. }
  425. do {
  426. ctx()->emit(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  427. consume(TK("@id"));
  428. Str name = prev().str();
  429. int index = ctx()->add_name(name);
  430. ctx()->emit(OP_LOAD_ATTR, index, prev().line);
  431. if (match(TK("as"))) {
  432. consume(TK("@id"));
  433. name = prev().str();
  434. }
  435. index = ctx()->add_name(name);
  436. auto op = name_scope()==NAME_LOCAL ? OP_STORE_LOCAL : OP_STORE_GLOBAL;
  437. ctx()->emit(op, index, prev().line);
  438. } while (match(TK(",")));
  439. ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  440. consume_end_stmt();
  441. }
  442. void parse_expression(int precedence, bool push_stack=true) {
  443. advance();
  444. PrattCallback prefix = rules[prev().type].prefix;
  445. if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(prev().type));
  446. (this->*prefix)();
  447. while (rules[curr().type].precedence >= precedence) {
  448. TokenIndex op = curr().type;
  449. advance();
  450. PrattCallback infix = rules[op].infix;
  451. if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true");
  452. (this->*infix)();
  453. }
  454. if(!push_stack) ctx()->emit_expr();
  455. }
  456. // PASS
  457. void compile_if_stmt() {
  458. EXPR(false); // condition
  459. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  460. compile_block_body();
  461. if (match(TK("elif"))) {
  462. int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  463. ctx()->patch_jump(patch);
  464. compile_if_stmt();
  465. ctx()->patch_jump(exit_patch);
  466. } else if (match(TK("else"))) {
  467. int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  468. ctx()->patch_jump(patch);
  469. compile_block_body();
  470. ctx()->patch_jump(exit_patch);
  471. } else {
  472. ctx()->patch_jump(patch);
  473. }
  474. }
  475. // PASS
  476. void compile_while_loop() {
  477. ctx()->enter_block(WHILE_LOOP);
  478. EXPR(false); // condition
  479. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  480. compile_block_body();
  481. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  482. ctx()->patch_jump(patch);
  483. ctx()->exit_block();
  484. }
  485. // PASS
  486. void compile_for_loop() {
  487. EXPR_TUPLE();
  488. Expr_ vars = ctx()->s_expr.popx();
  489. consume(TK("in"));
  490. EXPR(false);
  491. ctx()->emit(OP_GET_ITER, BC_NOARG, BC_KEEPLINE);
  492. ctx()->enter_block(FOR_LOOP);
  493. ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE);
  494. bool ok = vars->emit_store(ctx());
  495. if(!ok) SyntaxError(); // this error occurs in `vars` instead of this line, but...nevermind
  496. compile_block_body();
  497. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  498. ctx()->exit_block();
  499. }
  500. void compile_try_except() {
  501. // ctx()->enter_block(TRY_EXCEPT);
  502. // ctx()->emit(OP_TRY_BLOCK_ENTER, BC_NOARG, prev().line);
  503. // compile_block_body();
  504. // ctx()->emit(OP_TRY_BLOCK_EXIT, BC_NOARG, BC_KEEPLINE);
  505. // std::vector<int> patches = {
  506. // ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE)
  507. // };
  508. // ctx()->exit_block();
  509. // do {
  510. // consume(TK("except"));
  511. // if(match(TK("@id"))){
  512. // int name_idx = ctx()->add_name(prev().str(), NAME_SPECIAL);
  513. // emit(OP_EXCEPTION_MATCH, name_idx);
  514. // }else{
  515. // emit(OP_LOAD_TRUE);
  516. // }
  517. // int patch = emit(OP_POP_JUMP_IF_FALSE);
  518. // emit(OP_POP_TOP); // pop the exception on match
  519. // compile_block_body();
  520. // patches.push_back(emit(OP_JUMP_ABSOLUTE));
  521. // patch_jump(patch);
  522. // }while(curr().type == TK("except"));
  523. // emit(OP_RE_RAISE); // no match, re-raise
  524. // for (int patch : patches) patch_jump(patch);
  525. }
  526. void compile_decorated(){
  527. EXPR(false);
  528. if(!match_newlines(mode()==REPL_MODE)) SyntaxError();
  529. ctx()->emit(OP_SETUP_DECORATOR, BC_NOARG, prev().line);
  530. consume(TK("def"));
  531. compile_function();
  532. }
  533. bool try_compile_assignment(){
  534. Expr_ lhs = ctx()->s_expr.popx();
  535. switch (curr().type) {
  536. // case TK("+="): lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 0); break;
  537. // case TK("-="): lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 1); break;
  538. // case TK("*="): lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 2); break;
  539. // case TK("/="): lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 3); break;
  540. // case TK("//="): lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 4); break;
  541. // case TK("%="): lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 5); break;
  542. // case TK("<<="): lhs->emit(ctx()); advance(); emit(OP_BITWISE_OP, 0); break;
  543. // case TK(">>="): lhs->emit(ctx()); advance(); emit(OP_BITWISE_OP, 1); break;
  544. // case TK("&="): lhs->emit(ctx()); advance(); emit(OP_BITWISE_OP, 2); break;
  545. // case TK("|="): lhs->emit(ctx()); advance(); emit(OP_BITWISE_OP, 3); break;
  546. // case TK("^="): lhs->emit(ctx()); advance(); emit(OP_BITWISE_OP, 4); break;
  547. // case TK("="): advance(); break;
  548. case TK("+="): case TK("-="): case TK("*="): case TK("/="): case TK("//="): case TK("%="):
  549. case TK("<<="): case TK(">>="): case TK("&="): case TK("|="): case TK("^="): {
  550. advance();
  551. auto e = make_expr<BinaryExpr>();
  552. e->op = prev().type;
  553. e->lhs = lhs; // here should be a copy
  554. EXPR_TUPLE();
  555. e->rhs = ctx()->s_expr.popx();
  556. // ...
  557. } break;
  558. case TK("="): advance(); break;
  559. default: return false;
  560. }
  561. if(prev().type == TK("=")){
  562. EXPR_TUPLE();
  563. Expr_ rhs = ctx()->s_expr.popx();
  564. // do assign here
  565. // lhs = rhs
  566. return true;
  567. }
  568. return true;
  569. }
  570. void compile_stmt() {
  571. advance();
  572. int kw_line = prev().line; // backup line number
  573. switch(prev().type){
  574. case TK("break"):
  575. if (!ctx()->is_curr_block_loop()) SyntaxError("'break' outside loop");
  576. ctx()->emit(OP_LOOP_BREAK, BC_NOARG, kw_line);
  577. consume_end_stmt();
  578. break;
  579. case TK("continue"):
  580. if (!ctx()->is_curr_block_loop()) SyntaxError("'continue' not properly in loop");
  581. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, kw_line);
  582. consume_end_stmt();
  583. break;
  584. case TK("yield"):
  585. if (contexts.size() <= 1) SyntaxError("'yield' outside function");
  586. EXPR_TUPLE(true);
  587. // if yield present, mark the function as generator
  588. ctx()->co->is_generator = true;
  589. ctx()->emit(OP_YIELD_VALUE, BC_NOARG, kw_line);
  590. consume_end_stmt();
  591. break;
  592. case TK("return"):
  593. if (contexts.size() <= 1) SyntaxError("'ret urn' outside function");
  594. if(match_end_stmt()){
  595. ctx()->emit(OP_LOAD_NONE, BC_NOARG, kw_line);
  596. }else{
  597. EXPR_TUPLE(true);
  598. consume_end_stmt();
  599. }
  600. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, kw_line);
  601. break;
  602. /*************************************************/
  603. case TK("if"): compile_if_stmt(); break;
  604. case TK("while"): compile_while_loop(); break;
  605. case TK("for"): compile_for_loop(); break;
  606. case TK("import"): compile_normal_import(); break;
  607. case TK("from"): compile_from_import(); break;
  608. case TK("def"): compile_function(); break;
  609. case TK("@"): compile_decorated(); break;
  610. case TK("try"): compile_try_except(); break;
  611. case TK("pass"): consume_end_stmt(); break;
  612. /*************************************************/
  613. case TK("assert"):
  614. EXPR_TUPLE(true);
  615. // TODO: change OP_ASSERT impl in ceval.h
  616. ctx()->emit(OP_ASSERT, BC_NOARG, kw_line);
  617. consume_end_stmt();
  618. break;
  619. case TK("global"):
  620. do {
  621. consume(TK("@id"));
  622. ctx()->co->global_names.insert(prev().str());
  623. } while (match(TK(",")));
  624. consume_end_stmt();
  625. break;
  626. case TK("raise"): {
  627. consume(TK("@id"));
  628. int dummy_t = ctx()->add_name(prev().str());
  629. if(match(TK("(")) && !match(TK(")"))){
  630. EXPR(false); consume(TK(")"));
  631. }else{
  632. ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE);
  633. }
  634. ctx()->emit(OP_RAISE, dummy_t, kw_line);
  635. consume_end_stmt();
  636. } break;
  637. case TK("del"): {
  638. EXPR_TUPLE();
  639. Expr_ e = ctx()->s_expr.popx();
  640. bool ok = e->emit_del(ctx());
  641. if(!ok) SyntaxError();
  642. consume_end_stmt();
  643. } break;
  644. case TK("with"): {
  645. // TODO: reimpl this
  646. UNREACHABLE();
  647. // EXPR(false);
  648. // consume(TK("as"));
  649. // consume(TK("@id"));
  650. // int index = ctx()->add_name(prev().str(), name_scope());
  651. // emit(OP_STORE_NAME, index);
  652. // emit(OP_LOAD_NAME_REF, index);
  653. // emit(OP_WITH_ENTER);
  654. // compile_block_body();
  655. // emit(OP_LOAD_NAME_REF, index);
  656. // emit(OP_WITH_EXIT);
  657. } break;
  658. /*************************************************/
  659. // TODO: refactor goto/label use special $ syntax
  660. case TK("label"): {
  661. if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE");
  662. consume(TK(".")); consume(TK("@id"));
  663. bool ok = ctx()->add_label(prev().str());
  664. if(!ok) SyntaxError("label " + prev().str().escape(true) + " already exists");
  665. consume_end_stmt();
  666. } break;
  667. case TK("goto"):
  668. if(mode()!=EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE");
  669. consume(TK(".")); consume(TK("@id"));
  670. ctx()->emit(OP_GOTO, ctx()->add_name(prev().str()), prev().line);
  671. consume_end_stmt();
  672. break;
  673. /*************************************************/
  674. // handle dangling expression or assignment
  675. default: {
  676. EXPR_TUPLE();
  677. if(!try_compile_assignment()){
  678. ctx()->emit_expr();
  679. if(mode()==REPL_MODE && name_scope()==NAME_GLOBAL){
  680. ctx()->emit(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE);
  681. }else{
  682. ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  683. }
  684. }
  685. consume_end_stmt();
  686. }
  687. }
  688. }
  689. // PASS
  690. void compile_class(){
  691. consume(TK("@id"));
  692. int namei = ctx()->add_name(prev().str());
  693. int super_namei = -1;
  694. if(match(TK("(")) && match(TK("@id"))){
  695. super_namei = ctx()->add_name(prev().str());
  696. consume(TK(")"));
  697. }
  698. if(super_namei == -1) ctx()->emit(OP_LOAD_NONE, BC_NOARG, prev().line);
  699. else ctx()->emit(OP_LOAD_NAME, super_namei, prev().line);
  700. ctx()->emit(OP_BEGIN_CLASS, namei, BC_KEEPLINE);
  701. ctx()->is_compiling_class = true;
  702. compile_block_body();
  703. ctx()->is_compiling_class = false;
  704. ctx()->emit(OP_END_CLASS, BC_NOARG, BC_KEEPLINE);
  705. }
  706. void _compile_f_args(Function& func, bool enable_type_hints){
  707. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  708. do {
  709. if(state == 3) SyntaxError("**kwargs should be the last argument");
  710. match_newlines();
  711. if(match(TK("*"))){
  712. if(state < 1) state = 1;
  713. else SyntaxError("*args should be placed before **kwargs");
  714. }
  715. else if(match(TK("**"))){
  716. state = 3;
  717. }
  718. consume(TK("@id"));
  719. const Str& name = prev().str();
  720. if(func.has_name(name)) SyntaxError("duplicate argument name");
  721. // eat type hints
  722. if(enable_type_hints && match(TK(":"))) consume(TK("@id"));
  723. if(state == 0 && curr().type == TK("=")) state = 2;
  724. switch (state)
  725. {
  726. case 0: func.args.push_back(name); break;
  727. case 1: func.starred_arg = name; state+=1; break;
  728. case 2: {
  729. consume(TK("="));
  730. PyObject* value = read_literal();
  731. if(value == nullptr){
  732. SyntaxError(Str("expect a literal, not ") + TK_STR(curr().type));
  733. }
  734. func.kwargs.set(name, value);
  735. func.kwargs_order.push_back(name);
  736. } break;
  737. case 3: SyntaxError("**kwargs is not supported yet"); break;
  738. }
  739. } while (match(TK(",")));
  740. }
  741. void compile_function(){
  742. // TODO: bug, if there are multiple decorators, will cause error
  743. bool has_decorator = !co()->codes.empty() && co()->codes.back().op == OP_SETUP_DECORATOR;
  744. Function func;
  745. StrName obj_name;
  746. consume(TK("@id"));
  747. func.name = prev().str();
  748. if(!ctx()->is_compiling_class && match(TK("::"))){
  749. consume(TK("@id"));
  750. obj_name = func.name;
  751. func.name = prev().str();
  752. }
  753. consume(TK("("));
  754. if (!match(TK(")"))) {
  755. _compile_f_args(func, true);
  756. consume(TK(")"));
  757. }
  758. if(match(TK("->"))){
  759. if(!match(TK("None"))) consume(TK("@id"));
  760. }
  761. func.code = push_context(lexer->src, func.name.str());
  762. compile_block_body();
  763. pop_context();
  764. emit(OP_LOAD_FUNCTION, co()->add_const(VAR(func)));
  765. if(name_scope() == NAME_LOCAL) emit(OP_SETUP_CLOSURE);
  766. if(!ctx()->is_compiling_class){
  767. if(obj_name.empty()){
  768. if(has_decorator) emit(OP_CALL, 1);
  769. emit(OP_STORE_NAME, co()->add_name(func.name, name_scope()));
  770. } else {
  771. if(has_decorator) SyntaxError("decorator is not supported here");
  772. emit(OP_LOAD_NAME, co()->add_name(obj_name, name_scope()));
  773. int index = co()->add_name(func.name, NAME_ATTR);
  774. emit(OP_BUILD_ATTR_REF, index);
  775. emit(OP_ROT_TWO);
  776. emit(OP_STORE_REF);
  777. }
  778. }else{
  779. if(has_decorator) emit(OP_CALL, 1);
  780. emit(OP_STORE_CLASS_ATTR, co()->add_name(func.name, name_scope()));
  781. }
  782. }
  783. PyObject* read_literal(){
  784. if(match(TK("-"))){
  785. consume(TK("@num"));
  786. PyObject* val = get_value(prev());
  787. return vm->num_negated(val);
  788. }
  789. if(match(TK("@num"))) return get_value(prev());
  790. if(match(TK("@str"))) return get_value(prev());
  791. if(match(TK("True"))) return VAR(true);
  792. if(match(TK("False"))) return VAR(false);
  793. if(match(TK("None"))) return vm->None;
  794. if(match(TK("..."))) return vm->Ellipsis;
  795. return nullptr;
  796. }
  797. void SyntaxError(Str msg){ lexer->throw_err("SyntaxError", msg, curr().line, curr().start); }
  798. void SyntaxError(){ lexer->throw_err("SyntaxError", "invalid syntax", curr().line, curr().start); }
  799. void IndentationError(Str msg){ lexer->throw_err("IndentationError", msg, curr().line, curr().start); }
  800. public:
  801. Compiler(VM* vm, const char* source, Str filename, CompileMode mode){
  802. this->vm = vm;
  803. this->used = false;
  804. this->lexer = std::make_unique<Lexer>(
  805. make_sp<SourceData>(source, filename, mode)
  806. );
  807. if(rules.empty()) init_pratt_rules();
  808. }
  809. CodeObject_ compile(){
  810. if(used) UNREACHABLE();
  811. used = true;
  812. tokens = lexer->run();
  813. // if(lexer->src->filename == "<stdin>"){
  814. // for(auto& t: tokens) std::cout << t.info() << std::endl;
  815. // }
  816. CodeObject_ code = push_context(lexer->src, lexer->src->filename);
  817. advance(); // skip @sof, so prev() is always valid
  818. match_newlines(); // skip possible leading '\n'
  819. if(mode()==EVAL_MODE) {
  820. EXPR_TUPLE();
  821. consume(TK("@eof"));
  822. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  823. pop_context();
  824. return code;
  825. }else if(mode()==JSON_MODE){
  826. PyObject* value = read_literal();
  827. if(value != nullptr) emit(OP_LOAD_CONST, code->add_const(value));
  828. else if(match(TK("{"))) exprMap();
  829. else if(match(TK("["))) exprList();
  830. else SyntaxError("expect a JSON object or array");
  831. consume(TK("@eof"));
  832. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  833. pop_context();
  834. return code;
  835. }
  836. while (!match(TK("@eof"))) {
  837. if (match(TK("class"))) {
  838. compile_class();
  839. } else {
  840. compile_stmt();
  841. }
  842. match_newlines();
  843. }
  844. pop_context();
  845. return code;
  846. }
  847. };
  848. } // namespace pkpy