compiler.h 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903
  1. #pragma once
  2. #include "codeobject.h"
  3. #include "common.h"
  4. #include "lexer.h"
  5. #include "error.h"
  6. #include "ceval.h"
  7. #include "expr.h"
  8. #include "obj.h"
  9. #include "str.h"
  10. namespace pkpy{
  11. class Compiler;
  12. typedef void (Compiler::*PrattCallback)();
  13. struct PrattRule{
  14. PrattCallback prefix;
  15. PrattCallback infix;
  16. Precedence precedence;
  17. };
  18. class Compiler {
  19. std::unique_ptr<Lexer> lexer;
  20. stack<CodeEmitContext> contexts;
  21. std::map<TokenIndex, PrattRule> rules;
  22. VM* vm;
  23. bool used;
  24. // for parsing token stream
  25. int i = 0;
  26. std::vector<Token> tokens;
  27. const Token& prev() { return tokens.at(i-1); }
  28. const Token& curr() { return tokens.at(i); }
  29. const Token& next() { return tokens.at(i+1); }
  30. const Token& peek(int offset) { return tokens.at(i+offset); }
  31. void advance() { i++; }
  32. CodeEmitContext* ctx() { return &contexts.top(); }
  33. CompileMode mode() const{ return lexer->src->mode; }
  34. NameScope name_scope() const { return contexts.size()>1 ? NAME_LOCAL : NAME_GLOBAL; }
  35. template<typename... Args>
  36. CodeObject_ push_context(Args&&... args){
  37. CodeObject_ co = make_sp<CodeObject>(std::forward<Args>(args)...);
  38. contexts.push(CodeEmitContext(vm, co));
  39. return co;
  40. }
  41. void pop_context(){
  42. if(!ctx()->s_expr.empty()) UNREACHABLE();
  43. // if last instruction is not return, add a default return None
  44. if(ctx()->co->codes.back().op != OP_RETURN_VALUE){
  45. ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE);
  46. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  47. }
  48. ctx()->co->optimize(vm);
  49. contexts.pop();
  50. }
  51. public:
  52. Compiler(VM* vm, const char* source, Str filename, CompileMode mode){
  53. this->vm = vm;
  54. this->used = false;
  55. this->lexer = std::make_unique<Lexer>(
  56. make_sp<SourceData>(source, filename, mode)
  57. );
  58. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  59. #define METHOD(name) &Compiler::name
  60. #define NO_INFIX nullptr, PREC_NONE
  61. for(TokenIndex i=0; i<kTokenCount; i++) rules[i] = { nullptr, NO_INFIX };
  62. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  63. rules[TK("(")] = { METHOD(exprGroup), METHOD(exprCall), PREC_CALL };
  64. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscr), PREC_SUBSCRIPT };
  65. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  66. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  67. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  68. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  69. rules[TK("*")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_FACTOR };
  70. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  71. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  72. rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT };
  73. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  74. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  75. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  76. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  77. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  78. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  79. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  80. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  81. rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  82. rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  83. rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND };
  84. rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR };
  85. rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  86. rules[TK("?")] = { nullptr, METHOD(exprTernary), PREC_TERNARY };
  87. rules[TK(",")] = { nullptr, METHOD(exprTuple), PREC_TUPLE };
  88. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  89. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  90. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  91. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  92. rules[TK("not")] = { METHOD(exprNot), nullptr, PREC_LOGICAL_NOT };
  93. rules[TK("True")] = { METHOD(exprLiteral0), NO_INFIX };
  94. rules[TK("False")] = { METHOD(exprLiteral0), NO_INFIX };
  95. rules[TK("None")] = { METHOD(exprLiteral0), NO_INFIX };
  96. rules[TK("...")] = { METHOD(exprLiteral0), NO_INFIX };
  97. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  98. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  99. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  100. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  101. rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
  102. #undef METHOD
  103. #undef NO_INFIX
  104. // rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  105. // rules[TK("+=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
  106. // rules[TK("-=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
  107. // rules[TK("*=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
  108. // rules[TK("/=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
  109. // rules[TK("//=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
  110. // rules[TK("%=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
  111. // rules[TK("&=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
  112. // rules[TK("|=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
  113. // rules[TK("^=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
  114. // rules[TK(">>=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
  115. // rules[TK("<<=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
  116. }
  117. private:
  118. bool match(TokenIndex expected) {
  119. if (curr().type != expected) return false;
  120. advance();
  121. return true;
  122. }
  123. void consume(TokenIndex expected) {
  124. if (!match(expected)){
  125. StrStream ss;
  126. ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(curr().type) << "'";
  127. SyntaxError(ss.str());
  128. }
  129. }
  130. bool match_newlines(bool repl_throw=false) {
  131. bool consumed = false;
  132. if (curr().type == TK("@eol")) {
  133. while (curr().type == TK("@eol")) advance();
  134. consumed = true;
  135. }
  136. if (repl_throw && curr().type == TK("@eof")){
  137. throw NeedMoreLines(ctx()->is_compiling_class);
  138. }
  139. return consumed;
  140. }
  141. bool match_end_stmt() {
  142. if (match(TK(";"))) { match_newlines(); return true; }
  143. if (match_newlines() || curr().type == TK("@eof")) return true;
  144. if (curr().type == TK("@dedent")) return true;
  145. return false;
  146. }
  147. void consume_end_stmt() {
  148. if (!match_end_stmt()) SyntaxError("expected statement end");
  149. }
  150. /*************************************************/
  151. void EXPR(bool push_stack=true) {
  152. parse_expression(PREC_TUPLE+1, push_stack);
  153. }
  154. void EXPR_TUPLE(bool push_stack=true) {
  155. parse_expression(PREC_TUPLE, push_stack);
  156. }
  157. template <typename T, typename... Args>
  158. std::unique_ptr<T> make_expr(Args&&... args) {
  159. std::unique_ptr<T> expr = std::make_unique<T>(std::forward<Args>(args)...);
  160. expr->line = prev().line;
  161. return expr;
  162. }
  163. // PASS
  164. void exprLiteral(){
  165. ctx()->s_expr.push(make_expr<LiteralExpr>(prev().value));
  166. }
  167. // PASS
  168. void exprFString(){
  169. ctx()->s_expr.push(make_expr<FStringExpr>(std::get<Str>(prev().value)));
  170. }
  171. // PASS
  172. void exprLambda(){
  173. auto e = make_expr<LambdaExpr>();
  174. e->func.name = "<lambda>";
  175. e->scope = name_scope();
  176. if(!match(TK(":"))){
  177. _compile_f_args(e->func, false);
  178. consume(TK(":"));
  179. }
  180. e->func.code = push_context(lexer->src, "<lambda>");
  181. EXPR(true); // https://github.com/blueloveTH/pocketpy/issues/37
  182. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  183. pop_context();
  184. ctx()->s_expr.push(std::move(e));
  185. }
  186. // PASS
  187. void exprTuple(){
  188. auto e = make_expr<TupleExpr>();
  189. do {
  190. EXPR(); // NOTE: "1," will fail, "1,2" will be ok
  191. e->items.push_back(ctx()->s_expr.popx());
  192. } while(match(TK(",")));
  193. ctx()->s_expr.push(std::move(e));
  194. }
  195. // PASS
  196. void exprOr(){
  197. auto e = make_expr<OrExpr>();
  198. e->lhs = ctx()->s_expr.popx();
  199. parse_expression(PREC_LOGICAL_OR + 1);
  200. e->rhs = ctx()->s_expr.popx();
  201. ctx()->s_expr.push(std::move(e));
  202. }
  203. // PASS
  204. void exprAnd(){
  205. auto e = make_expr<AndExpr>();
  206. e->lhs = ctx()->s_expr.popx();
  207. parse_expression(PREC_LOGICAL_AND + 1);
  208. e->rhs = ctx()->s_expr.popx();
  209. ctx()->s_expr.push(std::move(e));
  210. }
  211. // PASS
  212. void exprTernary(){
  213. auto e = make_expr<TernaryExpr>();
  214. e->cond = ctx()->s_expr.popx();
  215. EXPR(); // if true
  216. e->true_expr = ctx()->s_expr.popx();
  217. consume(TK(":"));
  218. EXPR(); // if false
  219. e->false_expr = ctx()->s_expr.popx();
  220. ctx()->s_expr.push(std::move(e));
  221. }
  222. // PASS
  223. void exprBinaryOp(){
  224. auto e = make_expr<BinaryExpr>();
  225. e->op = prev().type;
  226. e->lhs = ctx()->s_expr.popx();
  227. parse_expression(rules[e->op].precedence + 1);
  228. e->rhs = ctx()->s_expr.popx();
  229. ctx()->s_expr.push(std::move(e));
  230. }
  231. // PASS
  232. void exprNot() {
  233. parse_expression(PREC_LOGICAL_NOT + 1);
  234. ctx()->s_expr.push(make_expr<NotExpr>(ctx()->s_expr.popx()));
  235. }
  236. // PASS
  237. void exprUnaryOp(){
  238. TokenIndex op = prev().type;
  239. parse_expression(PREC_UNARY + 1);
  240. switch(op){
  241. case TK("-"):
  242. ctx()->s_expr.push(make_expr<NegatedExpr>(ctx()->s_expr.popx()));
  243. break;
  244. case TK("*"):
  245. ctx()->s_expr.push(make_expr<StarredExpr>(ctx()->s_expr.popx()));
  246. break;
  247. default: UNREACHABLE();
  248. }
  249. }
  250. // PASS
  251. void exprGroup(){
  252. match_newlines(mode()==REPL_MODE);
  253. EXPR_TUPLE(); // () is just for change precedence
  254. match_newlines(mode()==REPL_MODE);
  255. consume(TK(")"));
  256. }
  257. // PASS
  258. template<typename T>
  259. void _consume_comp(Expr_ expr){
  260. static_assert(std::is_base_of<CompExpr, T>::value);
  261. std::unique_ptr<CompExpr> ce = std::make_unique<T>();
  262. ce->expr = std::move(expr);
  263. EXPR_TUPLE(); // must be a lvalue
  264. ce->vars = ctx()->s_expr.popx();
  265. consume(TK("in"));
  266. EXPR();
  267. ce->iter = ctx()->s_expr.popx();
  268. match_newlines(mode()==REPL_MODE);
  269. if(match(TK("if"))){
  270. EXPR();
  271. ce->cond = ctx()->s_expr.popx();
  272. }
  273. ctx()->s_expr.push(std::move(ce));
  274. match_newlines(mode()==REPL_MODE);
  275. }
  276. // PASS
  277. void exprList() {
  278. int line = prev().line;
  279. std::vector<Expr_> items;
  280. do {
  281. match_newlines(mode()==REPL_MODE);
  282. if (curr().type == TK("]")) break;
  283. EXPR();
  284. items.push_back(ctx()->s_expr.popx());
  285. match_newlines(mode()==REPL_MODE);
  286. if(items.size()==1 && match(TK("for"))){
  287. _consume_comp<ListCompExpr>(std::move(items[0]));
  288. consume(TK("]"));
  289. return;
  290. }
  291. match_newlines(mode()==REPL_MODE);
  292. } while (match(TK(",")));
  293. consume(TK("]"));
  294. auto e = make_expr<ListExpr>(std::move(items));
  295. e->line = line; // override line
  296. ctx()->s_expr.push(std::move(e));
  297. }
  298. // PASS
  299. void exprMap() {
  300. bool parsing_dict = false; // {...} may be dict or set
  301. std::vector<Expr_> items;
  302. do {
  303. match_newlines(mode()==REPL_MODE);
  304. if (curr().type == TK("}")) break;
  305. EXPR();
  306. if(curr().type == TK(":")) parsing_dict = true;
  307. if(parsing_dict){
  308. consume(TK(":"));
  309. EXPR();
  310. auto dict_item = make_expr<DictItemExpr>();
  311. dict_item->key = ctx()->s_expr.popx();
  312. dict_item->value = ctx()->s_expr.popx();
  313. items.push_back(std::move(dict_item));
  314. }else{
  315. items.push_back(ctx()->s_expr.popx());
  316. }
  317. match_newlines(mode()==REPL_MODE);
  318. if(items.size()==1 && match(TK("for"))){
  319. if(parsing_dict) _consume_comp<DictCompExpr>(std::move(items[0]));
  320. else _consume_comp<SetCompExpr>(std::move(items[0]));
  321. consume(TK("}"));
  322. return;
  323. }
  324. match_newlines(mode()==REPL_MODE);
  325. } while (match(TK(",")));
  326. consume(TK("}"));
  327. if(items.size()==0 || parsing_dict){
  328. auto e = make_expr<DictExpr>(std::move(items));
  329. ctx()->s_expr.push(std::move(e));
  330. }else{
  331. auto e = make_expr<SetExpr>(std::move(items));
  332. ctx()->s_expr.push(std::move(e));
  333. }
  334. }
  335. // PASS
  336. void exprCall() {
  337. auto e = make_expr<CallExpr>();
  338. e->callable = ctx()->s_expr.popx();
  339. do {
  340. match_newlines(mode()==REPL_MODE);
  341. if (curr().type==TK(")")) break;
  342. if(curr().type==TK("@id") && next().type==TK("=")) {
  343. consume(TK("@id"));
  344. Str key = prev().str();
  345. consume(TK("="));
  346. EXPR();
  347. e->kwargs.push_back({key, ctx()->s_expr.popx()});
  348. } else{
  349. if(!e->kwargs.empty()) SyntaxError("positional argument follows keyword argument");
  350. EXPR();
  351. e->args.push_back(ctx()->s_expr.popx());
  352. }
  353. match_newlines(mode()==REPL_MODE);
  354. } while (match(TK(",")));
  355. consume(TK(")"));
  356. if(e->args.size() > 32767) SyntaxError("too many positional arguments");
  357. if(e->kwargs.size() > 32767) SyntaxError("too many keyword arguments");
  358. ctx()->s_expr.push(std::move(e));
  359. }
  360. // PASS
  361. void exprName(){
  362. ctx()->s_expr.push(make_expr<NameExpr>(prev().str(), name_scope()));
  363. }
  364. // PASS
  365. void exprAttrib() {
  366. consume(TK("@id"));
  367. ctx()->s_expr.push(
  368. make_expr<AttribExpr>(ctx()->s_expr.popx(), prev().str())
  369. );
  370. }
  371. // PASS
  372. void exprSubscr() {
  373. auto e = make_expr<SubscrExpr>();
  374. std::vector<Expr_> items;
  375. do {
  376. EXPR_TUPLE();
  377. items.push_back(ctx()->s_expr.popx());
  378. } while(match(TK(":")));
  379. consume(TK("]"));
  380. switch(items.size()){
  381. case 1:
  382. e->b = std::move(items[0]);
  383. break;
  384. case 2: case 3: {
  385. auto slice = make_expr<SliceExpr>();
  386. slice->start = std::move(items[0]);
  387. slice->stop = std::move(items[1]);
  388. if(items.size()==3){
  389. slice->step = std::move(items[2]);
  390. }
  391. e->b = std::move(slice);
  392. } break;
  393. default: SyntaxError(); break;
  394. }
  395. ctx()->s_expr.push(std::move(e));
  396. }
  397. // PASS
  398. void exprLiteral0() {
  399. ctx()->s_expr.push(make_expr<Literal0Expr>(prev().type));
  400. }
  401. void compile_block_body() {
  402. consume(TK(":"));
  403. if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){
  404. compile_stmt(); // inline block
  405. return;
  406. }
  407. if(!match_newlines(mode()==REPL_MODE)){
  408. SyntaxError("expected a new line after ':'");
  409. }
  410. consume(TK("@indent"));
  411. while (curr().type != TK("@dedent")) {
  412. match_newlines();
  413. compile_stmt();
  414. match_newlines();
  415. }
  416. consume(TK("@dedent"));
  417. }
  418. Str _compile_import() {
  419. consume(TK("@id"));
  420. Str name = prev().str();
  421. int index = ctx()->add_name(name, NAME_SPECIAL);
  422. ctx()->emit(OP_IMPORT_NAME, index, peek(-2).line);
  423. return name;
  424. }
  425. // import a as b
  426. void compile_normal_import() {
  427. do {
  428. Str name = _compile_import();
  429. if (match(TK("as"))) {
  430. consume(TK("@id"));
  431. name = prev().str();
  432. }
  433. int index = ctx()->add_name(name, name_scope());
  434. ctx()->emit(OP_STORE_NAME, index, prev().line);
  435. } while (match(TK(",")));
  436. consume_end_stmt();
  437. }
  438. // from a import b as c, d as e
  439. void compile_from_import() {
  440. _compile_import();
  441. consume(TK("import"));
  442. if (match(TK("*"))) {
  443. if(name_scope() != NAME_GLOBAL) SyntaxError("import * can only be used in global scope");
  444. ctx()->emit(OP_STORE_ALL_NAMES, BC_NOARG, prev().line);
  445. consume_end_stmt();
  446. return;
  447. }
  448. do {
  449. ctx()->emit(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  450. consume(TK("@id"));
  451. Str name = prev().str();
  452. int index = ctx()->add_name(name);
  453. ctx()->emit(OP_LOAD_ATTR, index, prev().line);
  454. if (match(TK("as"))) {
  455. consume(TK("@id"));
  456. name = prev().str();
  457. }
  458. index = ctx()->add_name(name);
  459. ctx()->emit(OP_STORE_GLOBAL, index, prev().line);
  460. } while (match(TK(",")));
  461. ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  462. consume_end_stmt();
  463. }
  464. void parse_expression(int precedence, bool push_stack=true) {
  465. advance();
  466. PrattCallback prefix = rules[prev().type].prefix;
  467. if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(prev().type));
  468. (this->*prefix)();
  469. while (rules[curr().type].precedence >= precedence) {
  470. TokenIndex op = curr().type;
  471. advance();
  472. PrattCallback infix = rules[op].infix;
  473. if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true");
  474. (this->*infix)();
  475. }
  476. if(!push_stack) ctx()->emit_expr();
  477. }
  478. void compile_if_stmt() {
  479. EXPR(true); // condition
  480. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  481. compile_block_body();
  482. if (match(TK("elif"))) {
  483. int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  484. ctx()->patch_jump(patch);
  485. compile_if_stmt();
  486. ctx()->patch_jump(exit_patch);
  487. } else if (match(TK("else"))) {
  488. int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  489. ctx()->patch_jump(patch);
  490. compile_block_body();
  491. ctx()->patch_jump(exit_patch);
  492. } else {
  493. ctx()->patch_jump(patch);
  494. }
  495. }
  496. void compile_while_loop() {
  497. ctx()->enter_block(WHILE_LOOP);
  498. EXPR(true); // condition
  499. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  500. compile_block_body();
  501. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  502. ctx()->patch_jump(patch);
  503. ctx()->exit_block();
  504. }
  505. void compile_for_loop() {
  506. EXPR_TUPLE();
  507. ctx()->emit_lvalue();
  508. consume(TK("in"));
  509. EXPR(true);
  510. ctx()->emit(OP_GET_ITER, BC_NOARG, BC_KEEPLINE);
  511. ctx()->enter_block(FOR_LOOP);
  512. ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE);
  513. compile_block_body();
  514. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  515. ctx()->exit_block();
  516. }
  517. void compile_try_except() {
  518. ctx()->enter_block(TRY_EXCEPT);
  519. ctx()->emit(OP_TRY_BLOCK_ENTER, BC_NOARG, prev().line);
  520. compile_block_body();
  521. ctx()->emit(OP_TRY_BLOCK_EXIT, BC_NOARG, BC_KEEPLINE);
  522. std::vector<int> patches = {
  523. ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE)
  524. };
  525. ctx()->exit_block();
  526. do {
  527. consume(TK("except"));
  528. if(match(TK("@id"))){
  529. int name_idx = ctx()->add_name(prev().str(), NAME_SPECIAL);
  530. emit(OP_EXCEPTION_MATCH, name_idx);
  531. }else{
  532. emit(OP_LOAD_TRUE);
  533. }
  534. int patch = emit(OP_POP_JUMP_IF_FALSE);
  535. emit(OP_POP_TOP); // pop the exception on match
  536. compile_block_body();
  537. patches.push_back(emit(OP_JUMP_ABSOLUTE));
  538. patch_jump(patch);
  539. }while(curr().type == TK("except"));
  540. emit(OP_RE_RAISE); // no match, re-raise
  541. for (int patch : patches) patch_jump(patch);
  542. }
  543. void compile_decorated(){
  544. EXPR(true);
  545. if(!match_newlines(mode()==REPL_MODE)) SyntaxError();
  546. ctx()->emit(OP_SETUP_DECORATOR, BC_NOARG, prev().line);
  547. consume(TK("def"));
  548. compile_function();
  549. }
  550. bool try_compile_assignment(){
  551. // switch (op) {
  552. // case TK("+="): emit(OP_BINARY_OP, 0); break;
  553. // case TK("-="): emit(OP_BINARY_OP, 1); break;
  554. // case TK("*="): emit(OP_BINARY_OP, 2); break;
  555. // case TK("/="): emit(OP_BINARY_OP, 3); break;
  556. // case TK("//="): emit(OP_BINARY_OP, 4); break;
  557. // case TK("%="): emit(OP_BINARY_OP, 5); break;
  558. // case TK("<<="): emit(OP_BITWISE_OP, 0); break;
  559. // case TK(">>="): emit(OP_BITWISE_OP, 1); break;
  560. // case TK("&="): emit(OP_BITWISE_OP, 2); break;
  561. // case TK("|="): emit(OP_BITWISE_OP, 3); break;
  562. // case TK("^="): emit(OP_BITWISE_OP, 4); break;
  563. // default: UNREACHABLE();
  564. // }
  565. }
  566. void compile_stmt() {
  567. advance();
  568. int kw_line = prev().line; // backup line number
  569. switch(prev().type){
  570. case TK("break"):
  571. if (!ctx()->is_curr_block_loop()) SyntaxError("'break' outside loop");
  572. ctx()->emit(OP_LOOP_BREAK, BC_NOARG, kw_line);
  573. consume_end_stmt();
  574. break;
  575. case TK("continue"):
  576. if (!ctx()->is_curr_block_loop()) SyntaxError("'continue' not properly in loop");
  577. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, kw_line);
  578. consume_end_stmt();
  579. break;
  580. case TK("yield"):
  581. if (contexts.size() <= 1) SyntaxError("'yield' outside function");
  582. EXPR_TUPLE(true);
  583. // if yield present, mark the function as generator
  584. ctx()->co->is_generator = true;
  585. ctx()->emit(OP_YIELD_VALUE, BC_NOARG, kw_line);
  586. consume_end_stmt();
  587. break;
  588. case TK("return"):
  589. if (contexts.size() <= 1) SyntaxError("'ret urn' outside function");
  590. if(match_end_stmt()){
  591. ctx()->emit(OP_LOAD_NONE, BC_NOARG, kw_line);
  592. }else{
  593. EXPR_TUPLE(true);
  594. consume_end_stmt();
  595. }
  596. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, kw_line);
  597. break;
  598. /*************************************************/
  599. case TK("if"): compile_if_stmt(); break;
  600. case TK("while"): compile_while_loop(); break;
  601. case TK("for"): compile_for_loop(); break;
  602. case TK("import"): compile_normal_import(); break;
  603. case TK("from"): compile_from_import(); break;
  604. case TK("def"): compile_function(); break;
  605. case TK("@"): compile_decorated(); break;
  606. case TK("try"): compile_try_except(); break;
  607. case TK("pass"): consume_end_stmt(); break;
  608. /*************************************************/
  609. case TK("assert"):
  610. EXPR_TUPLE(true);
  611. // TODO: change OP_ASSERT impl in ceval.h
  612. ctx()->emit(OP_ASSERT, BC_NOARG, kw_line);
  613. consume_end_stmt();
  614. break;
  615. case TK("global"):
  616. do {
  617. consume(TK("@id"));
  618. co()->global_names.insert(prev().str());
  619. } while (match(TK(",")));
  620. consume_end_stmt();
  621. break;
  622. case TK("raise"): {
  623. consume(TK("@id"));
  624. int dummy_t = ctx()->add_name(prev().str(), NAME_SPECIAL);
  625. if(match(TK("(")) && !match(TK(")"))){
  626. EXPR(true); consume(TK(")"));
  627. }else{
  628. ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE);
  629. }
  630. ctx()->emit(OP_RAISE, dummy_t, kw_line);
  631. consume_end_stmt();
  632. } break;
  633. case TK("del"): {
  634. EXPR_TUPLE();
  635. Expr_ e = ctx()->s_expr.popx();
  636. bool ok = e->emit_del(ctx());
  637. if(!ok) SyntaxError();
  638. consume_end_stmt();
  639. } break;
  640. case TK("with"): {
  641. // TODO: reimpl this
  642. UNREACHABLE();
  643. // EXPR(true);
  644. // consume(TK("as"));
  645. // consume(TK("@id"));
  646. // int index = ctx()->add_name(prev().str(), name_scope());
  647. // emit(OP_STORE_NAME, index);
  648. // emit(OP_LOAD_NAME_REF, index);
  649. // emit(OP_WITH_ENTER);
  650. // compile_block_body();
  651. // emit(OP_LOAD_NAME_REF, index);
  652. // emit(OP_WITH_EXIT);
  653. } break;
  654. /*************************************************/
  655. // TODO: refactor goto/label use special $ syntax
  656. case TK("label"):
  657. if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE");
  658. consume(TK(".")); consume(TK("@id"));
  659. bool ok = co()->add_label(prev().str());
  660. if(!ok) SyntaxError("label " + prev().str().escape(true) + " already exists");
  661. consume_end_stmt();
  662. break;
  663. case TK("goto"):
  664. if(mode()!=EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE");
  665. consume(TK(".")); consume(TK("@id"));
  666. emit(OP_GOTO, co()->add_name(prev().str(), NAME_SPECIAL));
  667. consume_end_stmt();
  668. break;
  669. /*************************************************/
  670. // handle dangling expression or assignment
  671. default: {
  672. EXPR_TUPLE(true);
  673. if(!try_compile_assignment()){
  674. if(mode()==REPL_MODE && name_scope()==NAME_GLOBAL){
  675. emit(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE);
  676. }else{
  677. emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  678. }
  679. }
  680. consume_end_stmt();
  681. }
  682. }
  683. }
  684. void compile_class(){
  685. consume(TK("@id"));
  686. int cls_name_idx = co()->add_name(prev().str(), NAME_GLOBAL);
  687. int super_cls_name_idx = -1;
  688. if(match(TK("(")) && match(TK("@id"))){
  689. super_cls_name_idx = co()->add_name(prev().str(), NAME_GLOBAL);
  690. consume(TK(")"));
  691. }
  692. if(super_cls_name_idx == -1) emit(OP_LOAD_NONE);
  693. else emit(OP_LOAD_NAME, super_cls_name_idx);
  694. emit(OP_BEGIN_CLASS, cls_name_idx);
  695. ctx()->is_compiling_class = true;
  696. compile_block_body();
  697. ctx()->is_compiling_class = false;
  698. emit(OP_END_CLASS);
  699. }
  700. void _compile_f_args(Function& func, bool enable_type_hints){
  701. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  702. do {
  703. if(state == 3) SyntaxError("**kwargs should be the last argument");
  704. match_newlines();
  705. if(match(TK("*"))){
  706. if(state < 1) state = 1;
  707. else SyntaxError("*args should be placed before **kwargs");
  708. }
  709. else if(match(TK("**"))){
  710. state = 3;
  711. }
  712. consume(TK("@id"));
  713. const Str& name = prev().str();
  714. if(func.has_name(name)) SyntaxError("duplicate argument name");
  715. // eat type hints
  716. if(enable_type_hints && match(TK(":"))) consume(TK("@id"));
  717. if(state == 0 && curr().type == TK("=")) state = 2;
  718. switch (state)
  719. {
  720. case 0: func.args.push_back(name); break;
  721. case 1: func.starred_arg = name; state+=1; break;
  722. case 2: {
  723. consume(TK("="));
  724. PyObject* value = read_literal();
  725. if(value == nullptr){
  726. SyntaxError(Str("expect a literal, not ") + TK_STR(curr().type));
  727. }
  728. func.kwargs.set(name, value);
  729. func.kwargs_order.push_back(name);
  730. } break;
  731. case 3: SyntaxError("**kwargs is not supported yet"); break;
  732. }
  733. } while (match(TK(",")));
  734. }
  735. void compile_function(){
  736. // TODO: bug, if there are multiple decorators, will cause error
  737. bool has_decorator = !co()->codes.empty() && co()->codes.back().op == OP_SETUP_DECORATOR;
  738. Function func;
  739. StrName obj_name;
  740. consume(TK("@id"));
  741. func.name = prev().str();
  742. if(!ctx()->is_compiling_class && match(TK("::"))){
  743. consume(TK("@id"));
  744. obj_name = func.name;
  745. func.name = prev().str();
  746. }
  747. consume(TK("("));
  748. if (!match(TK(")"))) {
  749. _compile_f_args(func, true);
  750. consume(TK(")"));
  751. }
  752. if(match(TK("->"))){
  753. if(!match(TK("None"))) consume(TK("@id"));
  754. }
  755. func.code = push_context(lexer->src, func.name.str());
  756. compile_block_body();
  757. pop_context();
  758. emit(OP_LOAD_FUNCTION, co()->add_const(VAR(func)));
  759. if(name_scope() == NAME_LOCAL) emit(OP_SETUP_CLOSURE);
  760. if(!ctx()->is_compiling_class){
  761. if(obj_name.empty()){
  762. if(has_decorator) emit(OP_CALL, 1);
  763. emit(OP_STORE_NAME, co()->add_name(func.name, name_scope()));
  764. } else {
  765. if(has_decorator) SyntaxError("decorator is not supported here");
  766. emit(OP_LOAD_NAME, co()->add_name(obj_name, name_scope()));
  767. int index = co()->add_name(func.name, NAME_ATTR);
  768. emit(OP_BUILD_ATTR_REF, index);
  769. emit(OP_ROT_TWO);
  770. emit(OP_STORE_REF);
  771. }
  772. }else{
  773. if(has_decorator) emit(OP_CALL, 1);
  774. emit(OP_STORE_CLASS_ATTR, co()->add_name(func.name, name_scope()));
  775. }
  776. }
  777. PyObject* read_literal(){
  778. if(match(TK("-"))){
  779. consume(TK("@num"));
  780. PyObject* val = get_value(prev());
  781. return vm->num_negated(val);
  782. }
  783. if(match(TK("@num"))) return get_value(prev());
  784. if(match(TK("@str"))) return get_value(prev());
  785. if(match(TK("True"))) return VAR(true);
  786. if(match(TK("False"))) return VAR(false);
  787. if(match(TK("None"))) return vm->None;
  788. if(match(TK("..."))) return vm->Ellipsis;
  789. return nullptr;
  790. }
  791. void SyntaxError(Str msg){ lexer->throw_err("SyntaxError", msg, curr().line, curr().start); }
  792. void SyntaxError(){ lexer->throw_err("SyntaxError", "invalid syntax", curr().line, curr().start); }
  793. void IndentationError(Str msg){ lexer->throw_err("IndentationError", msg, curr().line, curr().start); }
  794. public:
  795. CodeObject_ compile(){
  796. if(used) UNREACHABLE();
  797. used = true;
  798. tokens = lexer->run();
  799. // if(lexer->src->filename == "<stdin>"){
  800. // for(auto& t: tokens) std::cout << t.info() << std::endl;
  801. // }
  802. CodeObject_ code = push_context(lexer->src, lexer->src->filename);
  803. advance(); // skip @sof, so prev() is always valid
  804. match_newlines(); // skip possible leading '\n'
  805. if(mode()==EVAL_MODE) {
  806. EXPR_TUPLE();
  807. consume(TK("@eof"));
  808. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  809. pop_context();
  810. return code;
  811. }else if(mode()==JSON_MODE){
  812. PyObject* value = read_literal();
  813. if(value != nullptr) emit(OP_LOAD_CONST, code->add_const(value));
  814. else if(match(TK("{"))) exprMap();
  815. else if(match(TK("["))) exprList();
  816. else SyntaxError("expect a JSON object or array");
  817. consume(TK("@eof"));
  818. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  819. pop_context();
  820. return code;
  821. }
  822. while (!match(TK("@eof"))) {
  823. if (match(TK("class"))) {
  824. compile_class();
  825. } else {
  826. compile_stmt();
  827. }
  828. match_newlines();
  829. }
  830. pop_context();
  831. return code;
  832. }
  833. };
  834. } // namespace pkpy