compiler.h 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065
  1. #pragma once
  2. #include "codeobject.h"
  3. #include "common.h"
  4. #include "expr.h"
  5. #include "obj.h"
  6. namespace pkpy{
  7. class Compiler;
  8. typedef void (Compiler::*PrattCallback)();
  9. struct PrattRule{
  10. PrattCallback prefix;
  11. PrattCallback infix;
  12. Precedence precedence;
  13. };
  14. class Compiler {
  15. inline static PrattRule rules[kTokenCount];
  16. std::unique_ptr<Lexer> lexer;
  17. stack<CodeEmitContext> contexts;
  18. VM* vm;
  19. bool unknown_global_scope; // for eval/exec() call
  20. bool used;
  21. // for parsing token stream
  22. int i = 0;
  23. std::vector<Token> tokens;
  24. const Token& prev() const{ return tokens.at(i-1); }
  25. const Token& curr() const{ return tokens.at(i); }
  26. const Token& next() const{ return tokens.at(i+1); }
  27. const Token& err() const{
  28. if(i >= tokens.size()) return prev();
  29. return curr();
  30. }
  31. void advance(int delta=1) { i += delta; }
  32. CodeEmitContext* ctx() { return &contexts.top(); }
  33. CompileMode mode() const{ return lexer->src->mode; }
  34. NameScope name_scope() const {
  35. auto s = contexts.size()>1 ? NAME_LOCAL : NAME_GLOBAL;
  36. if(unknown_global_scope && s == NAME_GLOBAL) s = NAME_GLOBAL_UNKNOWN;
  37. return s;
  38. }
  39. CodeObject_ push_global_context(){
  40. CodeObject_ co = make_sp<CodeObject>(lexer->src, lexer->src->filename);
  41. contexts.push(CodeEmitContext(vm, co, contexts.size()));
  42. return co;
  43. }
  44. FuncDecl_ push_f_context(Str name){
  45. FuncDecl_ decl = make_sp<FuncDecl>();
  46. decl->code = make_sp<CodeObject>(lexer->src, name);
  47. decl->nested = name_scope() == NAME_LOCAL;
  48. contexts.push(CodeEmitContext(vm, decl->code, contexts.size()));
  49. return decl;
  50. }
  51. void pop_context(){
  52. if(!ctx()->s_expr.empty()){
  53. throw std::runtime_error("!ctx()->s_expr.empty()\n" + ctx()->_log_s_expr());
  54. }
  55. // add a `return None` in the end as a guard
  56. // previously, we only do this if the last opcode is not a return
  57. // however, this is buggy...since there may be a jump to the end (out of bound) even if the last opcode is a return
  58. ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE);
  59. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  60. // ctx()->co->optimize(vm);
  61. if(ctx()->co->varnames.size() > PK_MAX_CO_VARNAMES){
  62. SyntaxError("maximum number of local variables exceeded");
  63. }
  64. contexts.pop();
  65. }
  66. static void init_pratt_rules(){
  67. if(rules[TK(".")].precedence != PREC_NONE) return;
  68. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  69. #define METHOD(name) &Compiler::name
  70. #define NO_INFIX nullptr, PREC_NONE
  71. for(TokenIndex i=0; i<kTokenCount; i++) rules[i] = { nullptr, NO_INFIX };
  72. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  73. rules[TK("(")] = { METHOD(exprGroup), METHOD(exprCall), PREC_CALL };
  74. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscr), PREC_SUBSCRIPT };
  75. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  76. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  77. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  78. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  79. rules[TK("*")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_FACTOR };
  80. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  81. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  82. rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT };
  83. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  84. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  85. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  86. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  87. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  88. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  89. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  90. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  91. rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  92. rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  93. rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND };
  94. rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR };
  95. rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  96. rules[TK("@")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  97. rules[TK("if")] = { nullptr, METHOD(exprTernary), PREC_TERNARY };
  98. rules[TK(",")] = { nullptr, METHOD(exprTuple), PREC_TUPLE };
  99. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  100. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  101. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  102. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  103. rules[TK("not")] = { METHOD(exprNot), nullptr, PREC_LOGICAL_NOT };
  104. rules[TK("True")] = { METHOD(exprLiteral0), NO_INFIX };
  105. rules[TK("False")] = { METHOD(exprLiteral0), NO_INFIX };
  106. rules[TK("None")] = { METHOD(exprLiteral0), NO_INFIX };
  107. rules[TK("...")] = { METHOD(exprLiteral0), NO_INFIX };
  108. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  109. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  110. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  111. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  112. rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
  113. #undef METHOD
  114. #undef NO_INFIX
  115. }
  116. bool match(TokenIndex expected) {
  117. if (curr().type != expected) return false;
  118. advance();
  119. return true;
  120. }
  121. void consume(TokenIndex expected) {
  122. if (!match(expected)){
  123. SyntaxError(
  124. fmt("expected '", TK_STR(expected), "', but got '", TK_STR(curr().type), "'")
  125. );
  126. }
  127. }
  128. bool match_newlines_repl(){
  129. return match_newlines(mode()==REPL_MODE);
  130. }
  131. bool match_newlines(bool repl_throw=false) {
  132. bool consumed = false;
  133. if (curr().type == TK("@eol")) {
  134. while (curr().type == TK("@eol")) advance();
  135. consumed = true;
  136. }
  137. if (repl_throw && curr().type == TK("@eof")){
  138. throw NeedMoreLines(ctx()->is_compiling_class);
  139. }
  140. return consumed;
  141. }
  142. bool match_end_stmt() {
  143. if (match(TK(";"))) { match_newlines(); return true; }
  144. if (match_newlines() || curr().type == TK("@eof")) return true;
  145. if (curr().type == TK("@dedent")) return true;
  146. return false;
  147. }
  148. void consume_end_stmt() {
  149. if (!match_end_stmt()) SyntaxError("expected statement end");
  150. }
  151. /*************************************************/
  152. void EXPR(bool push_stack=true) {
  153. parse_expression(PREC_TUPLE+1, push_stack);
  154. }
  155. void EXPR_TUPLE(bool push_stack=true) {
  156. parse_expression(PREC_TUPLE, push_stack);
  157. }
  158. // special case for `for loop` and `comp`
  159. Expr_ EXPR_VARS(){
  160. std::vector<Expr_> items;
  161. do {
  162. consume(TK("@id"));
  163. items.push_back(make_expr<NameExpr>(prev().str(), name_scope()));
  164. } while(match(TK(",")));
  165. if(items.size()==1) return std::move(items[0]);
  166. return make_expr<TupleExpr>(std::move(items));
  167. }
  168. template <typename T, typename... Args>
  169. std::unique_ptr<T> make_expr(Args&&... args) {
  170. std::unique_ptr<T> expr = std::make_unique<T>(std::forward<Args>(args)...);
  171. expr->line = prev().line;
  172. return expr;
  173. }
  174. void exprLiteral(){
  175. ctx()->s_expr.push(make_expr<LiteralExpr>(prev().value));
  176. }
  177. void exprFString(){
  178. ctx()->s_expr.push(make_expr<FStringExpr>(std::get<Str>(prev().value)));
  179. }
  180. void exprLambda(){
  181. FuncDecl_ decl = push_f_context("<lambda>");
  182. auto e = make_expr<LambdaExpr>(decl);
  183. if(!match(TK(":"))){
  184. _compile_f_args(e->decl, false);
  185. consume(TK(":"));
  186. }
  187. // https://github.com/blueloveTH/pocketpy/issues/37
  188. parse_expression(PREC_LAMBDA + 1, false);
  189. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  190. pop_context();
  191. ctx()->s_expr.push(std::move(e));
  192. }
  193. void exprTuple(){
  194. std::vector<Expr_> items;
  195. items.push_back(ctx()->s_expr.popx());
  196. do {
  197. EXPR(); // NOTE: "1," will fail, "1,2" will be ok
  198. items.push_back(ctx()->s_expr.popx());
  199. } while(match(TK(",")));
  200. ctx()->s_expr.push(make_expr<TupleExpr>(
  201. std::move(items)
  202. ));
  203. }
  204. void exprOr(){
  205. auto e = make_expr<OrExpr>();
  206. e->lhs = ctx()->s_expr.popx();
  207. parse_expression(PREC_LOGICAL_OR + 1);
  208. e->rhs = ctx()->s_expr.popx();
  209. ctx()->s_expr.push(std::move(e));
  210. }
  211. void exprAnd(){
  212. auto e = make_expr<AndExpr>();
  213. e->lhs = ctx()->s_expr.popx();
  214. parse_expression(PREC_LOGICAL_AND + 1);
  215. e->rhs = ctx()->s_expr.popx();
  216. ctx()->s_expr.push(std::move(e));
  217. }
  218. void exprTernary(){
  219. auto e = make_expr<TernaryExpr>();
  220. e->true_expr = ctx()->s_expr.popx();
  221. // cond
  222. parse_expression(PREC_TERNARY + 1);
  223. e->cond = ctx()->s_expr.popx();
  224. consume(TK("else"));
  225. // if false
  226. parse_expression(PREC_TERNARY + 1);
  227. e->false_expr = ctx()->s_expr.popx();
  228. ctx()->s_expr.push(std::move(e));
  229. }
  230. void exprBinaryOp(){
  231. auto e = make_expr<BinaryExpr>();
  232. e->op = prev().type;
  233. e->lhs = ctx()->s_expr.popx();
  234. parse_expression(rules[e->op].precedence + 1);
  235. e->rhs = ctx()->s_expr.popx();
  236. ctx()->s_expr.push(std::move(e));
  237. }
  238. void exprNot() {
  239. parse_expression(PREC_LOGICAL_NOT + 1);
  240. ctx()->s_expr.push(make_expr<NotExpr>(ctx()->s_expr.popx()));
  241. }
  242. void exprUnaryOp(){
  243. TokenIndex op = prev().type;
  244. parse_expression(PREC_UNARY + 1);
  245. switch(op){
  246. case TK("-"):
  247. ctx()->s_expr.push(make_expr<NegatedExpr>(ctx()->s_expr.popx()));
  248. break;
  249. case TK("*"):
  250. ctx()->s_expr.push(make_expr<StarredExpr>(ctx()->s_expr.popx()));
  251. break;
  252. default: FATAL_ERROR();
  253. }
  254. }
  255. void exprGroup(){
  256. match_newlines_repl();
  257. EXPR_TUPLE(); // () is just for change precedence
  258. match_newlines_repl();
  259. consume(TK(")"));
  260. }
  261. template<typename T>
  262. void _consume_comp(Expr_ expr){
  263. static_assert(std::is_base_of<CompExpr, T>::value);
  264. std::unique_ptr<CompExpr> ce = make_expr<T>();
  265. ce->expr = std::move(expr);
  266. ce->vars = EXPR_VARS();
  267. consume(TK("in"));
  268. parse_expression(PREC_TERNARY + 1);
  269. ce->iter = ctx()->s_expr.popx();
  270. match_newlines_repl();
  271. if(match(TK("if"))){
  272. parse_expression(PREC_TERNARY + 1);
  273. ce->cond = ctx()->s_expr.popx();
  274. }
  275. ctx()->s_expr.push(std::move(ce));
  276. match_newlines_repl();
  277. }
  278. void exprList() {
  279. int line = prev().line;
  280. std::vector<Expr_> items;
  281. do {
  282. match_newlines_repl();
  283. if (curr().type == TK("]")) break;
  284. EXPR();
  285. items.push_back(ctx()->s_expr.popx());
  286. if(items.back()->is_starred()) SyntaxError();
  287. match_newlines_repl();
  288. if(items.size()==1 && match(TK("for"))){
  289. _consume_comp<ListCompExpr>(std::move(items[0]));
  290. consume(TK("]"));
  291. return;
  292. }
  293. match_newlines_repl();
  294. } while (match(TK(",")));
  295. consume(TK("]"));
  296. auto e = make_expr<ListExpr>(std::move(items));
  297. e->line = line; // override line
  298. ctx()->s_expr.push(std::move(e));
  299. }
  300. void exprMap() {
  301. bool parsing_dict = false; // {...} may be dict or set
  302. std::vector<Expr_> items;
  303. do {
  304. match_newlines_repl();
  305. if (curr().type == TK("}")) break;
  306. EXPR();
  307. if(curr().type == TK(":")) parsing_dict = true;
  308. if(parsing_dict){
  309. consume(TK(":"));
  310. EXPR();
  311. auto dict_item = make_expr<DictItemExpr>();
  312. dict_item->key = ctx()->s_expr.popx();
  313. dict_item->value = ctx()->s_expr.popx();
  314. if(dict_item->key->is_starred()) SyntaxError();
  315. if(dict_item->value->is_starred()) SyntaxError();
  316. items.push_back(std::move(dict_item));
  317. }else{
  318. items.push_back(ctx()->s_expr.popx());
  319. if(items.back()->is_starred()) SyntaxError();
  320. }
  321. match_newlines_repl();
  322. if(items.size()==1 && match(TK("for"))){
  323. if(parsing_dict) _consume_comp<DictCompExpr>(std::move(items[0]));
  324. else _consume_comp<SetCompExpr>(std::move(items[0]));
  325. consume(TK("}"));
  326. return;
  327. }
  328. match_newlines_repl();
  329. } while (match(TK(",")));
  330. consume(TK("}"));
  331. if(items.size()==0 || parsing_dict){
  332. auto e = make_expr<DictExpr>(std::move(items));
  333. ctx()->s_expr.push(std::move(e));
  334. }else{
  335. auto e = make_expr<SetExpr>(std::move(items));
  336. ctx()->s_expr.push(std::move(e));
  337. }
  338. }
  339. void exprCall() {
  340. auto e = make_expr<CallExpr>();
  341. e->callable = ctx()->s_expr.popx();
  342. do {
  343. match_newlines_repl();
  344. if (curr().type==TK(")")) break;
  345. if(curr().type==TK("@id") && next().type==TK("=")) {
  346. consume(TK("@id"));
  347. Str key = prev().str();
  348. consume(TK("="));
  349. EXPR();
  350. e->kwargs.push_back({key, ctx()->s_expr.popx()});
  351. } else{
  352. if(!e->kwargs.empty()) SyntaxError("positional argument follows keyword argument");
  353. EXPR();
  354. e->args.push_back(ctx()->s_expr.popx());
  355. }
  356. match_newlines_repl();
  357. } while (match(TK(",")));
  358. consume(TK(")"));
  359. if(e->args.size() > 32767) SyntaxError("too many positional arguments");
  360. if(e->kwargs.size() > 32767) SyntaxError("too many keyword arguments");
  361. ctx()->s_expr.push(std::move(e));
  362. }
  363. void exprName(){
  364. Str name = prev().str();
  365. NameScope scope = name_scope();
  366. if(ctx()->co->global_names.count(name)){
  367. scope = NAME_GLOBAL;
  368. }
  369. ctx()->s_expr.push(make_expr<NameExpr>(name, scope));
  370. }
  371. void exprAttrib() {
  372. consume(TK("@id"));
  373. ctx()->s_expr.push(
  374. make_expr<AttribExpr>(ctx()->s_expr.popx(), prev().str())
  375. );
  376. }
  377. void exprSubscr() {
  378. auto e = make_expr<SubscrExpr>();
  379. e->a = ctx()->s_expr.popx();
  380. auto slice = make_expr<SliceExpr>();
  381. bool is_slice = false;
  382. // a[<0> <state:1> : state<3> : state<5>]
  383. int state = 0;
  384. do{
  385. switch(state){
  386. case 0:
  387. if(match(TK(":"))){
  388. is_slice=true;
  389. state=2;
  390. break;
  391. }
  392. if(match(TK("]"))) SyntaxError();
  393. EXPR_TUPLE();
  394. slice->start = ctx()->s_expr.popx();
  395. state=1;
  396. break;
  397. case 1:
  398. if(match(TK(":"))){
  399. is_slice=true;
  400. state=2;
  401. break;
  402. }
  403. if(match(TK("]"))) goto __SUBSCR_END;
  404. SyntaxError("expected ':' or ']'");
  405. break;
  406. case 2:
  407. if(match(TK(":"))){
  408. state=4;
  409. break;
  410. }
  411. if(match(TK("]"))) goto __SUBSCR_END;
  412. EXPR_TUPLE();
  413. slice->stop = ctx()->s_expr.popx();
  414. state=3;
  415. break;
  416. case 3:
  417. if(match(TK(":"))){
  418. state=4;
  419. break;
  420. }
  421. if(match(TK("]"))) goto __SUBSCR_END;
  422. SyntaxError("expected ':' or ']'");
  423. break;
  424. case 4:
  425. if(match(TK("]"))) goto __SUBSCR_END;
  426. EXPR_TUPLE();
  427. slice->step = ctx()->s_expr.popx();
  428. state=5;
  429. break;
  430. case 5: consume(TK("]")); goto __SUBSCR_END;
  431. }
  432. }while(true);
  433. __SUBSCR_END:
  434. if(is_slice){
  435. e->b = std::move(slice);
  436. }else{
  437. if(state != 1) FATAL_ERROR();
  438. e->b = std::move(slice->start);
  439. }
  440. ctx()->s_expr.push(std::move(e));
  441. }
  442. void exprLiteral0() {
  443. ctx()->s_expr.push(make_expr<Literal0Expr>(prev().type));
  444. }
  445. void compile_block_body() {
  446. consume(TK(":"));
  447. if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){
  448. compile_stmt(); // inline block
  449. return;
  450. }
  451. if(!match_newlines(mode()==REPL_MODE)){
  452. SyntaxError("expected a new line after ':'");
  453. }
  454. consume(TK("@indent"));
  455. while (curr().type != TK("@dedent")) {
  456. match_newlines();
  457. compile_stmt();
  458. match_newlines();
  459. }
  460. consume(TK("@dedent"));
  461. }
  462. Str _compile_import() {
  463. if(name_scope() != NAME_GLOBAL) SyntaxError("import statement should be used in global scope");
  464. Opcode op = OP_IMPORT_NAME;
  465. if(match(TK("."))) op = OP_IMPORT_NAME_REL;
  466. consume(TK("@id"));
  467. Str name = prev().str();
  468. ctx()->emit(op, StrName(name).index, prev().line);
  469. return name;
  470. }
  471. // import a as b
  472. void compile_normal_import() {
  473. do {
  474. Str name = _compile_import();
  475. if (match(TK("as"))) {
  476. consume(TK("@id"));
  477. name = prev().str();
  478. }
  479. ctx()->emit(OP_STORE_GLOBAL, StrName(name).index, prev().line);
  480. } while (match(TK(",")));
  481. consume_end_stmt();
  482. }
  483. // from a import b as c, d as e
  484. void compile_from_import() {
  485. _compile_import();
  486. consume(TK("import"));
  487. if (match(TK("*"))) {
  488. ctx()->emit(OP_IMPORT_STAR, BC_NOARG, prev().line);
  489. consume_end_stmt();
  490. return;
  491. }
  492. do {
  493. ctx()->emit(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  494. consume(TK("@id"));
  495. Str name = prev().str();
  496. ctx()->emit(OP_LOAD_ATTR, StrName(name).index, prev().line);
  497. if (match(TK("as"))) {
  498. consume(TK("@id"));
  499. name = prev().str();
  500. }
  501. ctx()->emit(OP_STORE_GLOBAL, StrName(name).index, prev().line);
  502. } while (match(TK(",")));
  503. ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  504. consume_end_stmt();
  505. }
  506. void parse_expression(int precedence, bool push_stack=true) {
  507. advance();
  508. PrattCallback prefix = rules[prev().type].prefix;
  509. if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(prev().type));
  510. (this->*prefix)();
  511. while (rules[curr().type].precedence >= precedence) {
  512. TokenIndex op = curr().type;
  513. advance();
  514. PrattCallback infix = rules[op].infix;
  515. if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true");
  516. (this->*infix)();
  517. }
  518. if(!push_stack) ctx()->emit_expr();
  519. }
  520. void compile_if_stmt() {
  521. EXPR(false); // condition
  522. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  523. compile_block_body();
  524. if (match(TK("elif"))) {
  525. int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  526. ctx()->patch_jump(patch);
  527. compile_if_stmt();
  528. ctx()->patch_jump(exit_patch);
  529. } else if (match(TK("else"))) {
  530. int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  531. ctx()->patch_jump(patch);
  532. compile_block_body();
  533. ctx()->patch_jump(exit_patch);
  534. } else {
  535. ctx()->patch_jump(patch);
  536. }
  537. }
  538. void compile_while_loop() {
  539. ctx()->enter_block(WHILE_LOOP);
  540. EXPR(false); // condition
  541. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  542. compile_block_body();
  543. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  544. ctx()->patch_jump(patch);
  545. ctx()->exit_block();
  546. }
  547. void compile_for_loop() {
  548. Expr_ vars = EXPR_VARS();
  549. consume(TK("in"));
  550. EXPR_TUPLE(false);
  551. ctx()->emit(OP_GET_ITER, BC_NOARG, BC_KEEPLINE);
  552. ctx()->enter_block(FOR_LOOP);
  553. ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE);
  554. bool ok = vars->emit_store(ctx());
  555. if(!ok) SyntaxError(); // this error occurs in `vars` instead of this line, but...nevermind
  556. compile_block_body();
  557. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  558. ctx()->exit_block();
  559. }
  560. void compile_try_except() {
  561. ctx()->enter_block(TRY_EXCEPT);
  562. compile_block_body();
  563. std::vector<int> patches = {
  564. ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE)
  565. };
  566. ctx()->exit_block();
  567. do {
  568. consume(TK("except"));
  569. if(match(TK("@id"))){
  570. ctx()->emit(OP_EXCEPTION_MATCH, StrName(prev().str()).index, prev().line);
  571. }else{
  572. ctx()->emit(OP_LOAD_TRUE, BC_NOARG, BC_KEEPLINE);
  573. }
  574. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE);
  575. // pop the exception on match
  576. ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  577. compile_block_body();
  578. patches.push_back(ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE));
  579. ctx()->patch_jump(patch);
  580. }while(curr().type == TK("except"));
  581. // no match, re-raise
  582. ctx()->emit(OP_RE_RAISE, BC_NOARG, BC_KEEPLINE);
  583. for (int patch : patches) ctx()->patch_jump(patch);
  584. }
  585. void compile_decorated(){
  586. std::vector<Expr_> decorators;
  587. do{
  588. EXPR();
  589. decorators.push_back(ctx()->s_expr.popx());
  590. if(!match_newlines_repl()) SyntaxError();
  591. }while(match(TK("@")));
  592. consume(TK("def"));
  593. compile_function(decorators);
  594. }
  595. bool try_compile_assignment(){
  596. Expr* lhs_p = ctx()->s_expr.top().get();
  597. bool inplace;
  598. switch (curr().type) {
  599. case TK("+="): case TK("-="): case TK("*="): case TK("/="): case TK("//="): case TK("%="):
  600. case TK("<<="): case TK(">>="): case TK("&="): case TK("|="): case TK("^="): {
  601. if(ctx()->is_compiling_class) SyntaxError();
  602. inplace = true;
  603. advance();
  604. auto e = make_expr<BinaryExpr>();
  605. e->op = prev().type - 1; // -1 to remove =
  606. e->lhs = ctx()->s_expr.popx();
  607. EXPR_TUPLE();
  608. e->rhs = ctx()->s_expr.popx();
  609. ctx()->s_expr.push(std::move(e));
  610. } break;
  611. case TK("="):
  612. inplace = false;
  613. advance();
  614. EXPR_TUPLE();
  615. break;
  616. default: return false;
  617. }
  618. // std::cout << ctx()->_log_s_expr() << std::endl;
  619. Expr_ rhs = ctx()->s_expr.popx();
  620. if(lhs_p->is_starred() || rhs->is_starred()){
  621. SyntaxError("can't use starred expression here");
  622. }
  623. rhs->emit(ctx());
  624. bool ok = lhs_p->emit_store(ctx());
  625. if(!ok) SyntaxError();
  626. if(!inplace) ctx()->s_expr.pop();
  627. return true;
  628. }
  629. void compile_stmt() {
  630. advance();
  631. int kw_line = prev().line; // backup line number
  632. switch(prev().type){
  633. case TK("break"):
  634. if (!ctx()->is_curr_block_loop()) SyntaxError("'break' outside loop");
  635. ctx()->emit(OP_LOOP_BREAK, BC_NOARG, kw_line);
  636. consume_end_stmt();
  637. break;
  638. case TK("continue"):
  639. if (!ctx()->is_curr_block_loop()) SyntaxError("'continue' not properly in loop");
  640. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, kw_line);
  641. consume_end_stmt();
  642. break;
  643. case TK("yield"):
  644. if (contexts.size() <= 1) SyntaxError("'yield' outside function");
  645. EXPR_TUPLE(false);
  646. // if yield present, mark the function as generator
  647. ctx()->co->is_generator = true;
  648. ctx()->emit(OP_YIELD_VALUE, BC_NOARG, kw_line);
  649. consume_end_stmt();
  650. break;
  651. case TK("yield from"):
  652. if (contexts.size() <= 1) SyntaxError("'yield from' outside function");
  653. EXPR_TUPLE(false);
  654. // if yield from present, mark the function as generator
  655. ctx()->co->is_generator = true;
  656. ctx()->emit(OP_GET_ITER, BC_NOARG, kw_line);
  657. ctx()->enter_block(FOR_LOOP);
  658. ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE);
  659. ctx()->emit(OP_YIELD_VALUE, BC_NOARG, BC_KEEPLINE);
  660. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  661. ctx()->exit_block();
  662. consume_end_stmt();
  663. break;
  664. case TK("return"):
  665. if (contexts.size() <= 1) SyntaxError("'return' outside function");
  666. if(match_end_stmt()){
  667. ctx()->emit(OP_LOAD_NONE, BC_NOARG, kw_line);
  668. }else{
  669. EXPR_TUPLE(false);
  670. consume_end_stmt();
  671. }
  672. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, kw_line);
  673. break;
  674. /*************************************************/
  675. case TK("if"): compile_if_stmt(); break;
  676. case TK("while"): compile_while_loop(); break;
  677. case TK("for"): compile_for_loop(); break;
  678. case TK("import"): compile_normal_import(); break;
  679. case TK("from"): compile_from_import(); break;
  680. case TK("def"): compile_function(); break;
  681. case TK("@"): compile_decorated(); break;
  682. case TK("try"): compile_try_except(); break;
  683. case TK("pass"): consume_end_stmt(); break;
  684. /*************************************************/
  685. case TK("++"):{
  686. consume(TK("@id"));
  687. StrName name(prev().sv());
  688. switch(name_scope()){
  689. case NAME_LOCAL:
  690. ctx()->emit(OP_INC_FAST, ctx()->add_varname(name), prev().line);
  691. break;
  692. case NAME_GLOBAL:
  693. ctx()->emit(OP_INC_GLOBAL, name.index, prev().line);
  694. break;
  695. default: SyntaxError(); break;
  696. }
  697. consume_end_stmt();
  698. break;
  699. }
  700. case TK("--"):{
  701. consume(TK("@id"));
  702. StrName name(prev().sv());
  703. switch(name_scope()){
  704. case NAME_LOCAL:
  705. ctx()->emit(OP_DEC_FAST, ctx()->add_varname(name), prev().line);
  706. break;
  707. case NAME_GLOBAL:
  708. ctx()->emit(OP_DEC_GLOBAL, name.index, prev().line);
  709. break;
  710. default: SyntaxError(); break;
  711. }
  712. consume_end_stmt();
  713. break;
  714. }
  715. case TK("assert"):
  716. EXPR_TUPLE(false);
  717. ctx()->emit(OP_ASSERT, BC_NOARG, kw_line);
  718. consume_end_stmt();
  719. break;
  720. case TK("global"):
  721. do {
  722. consume(TK("@id"));
  723. ctx()->co->global_names.insert(prev().str());
  724. } while (match(TK(",")));
  725. consume_end_stmt();
  726. break;
  727. case TK("raise"): {
  728. consume(TK("@id"));
  729. int dummy_t = StrName(prev().str()).index;
  730. if(match(TK("(")) && !match(TK(")"))){
  731. EXPR(false); consume(TK(")"));
  732. }else{
  733. ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE);
  734. }
  735. ctx()->emit(OP_RAISE, dummy_t, kw_line);
  736. consume_end_stmt();
  737. } break;
  738. case TK("del"): {
  739. EXPR_TUPLE();
  740. Expr_ e = ctx()->s_expr.popx();
  741. bool ok = e->emit_del(ctx());
  742. if(!ok) SyntaxError();
  743. consume_end_stmt();
  744. } break;
  745. case TK("with"): {
  746. EXPR(false);
  747. consume(TK("as"));
  748. consume(TK("@id"));
  749. StrName name(prev().str());
  750. ctx()->emit(OP_STORE_NAME, name.index, prev().line);
  751. ctx()->emit(OP_LOAD_NAME, name.index, prev().line);
  752. ctx()->emit(OP_WITH_ENTER, BC_NOARG, prev().line);
  753. compile_block_body();
  754. ctx()->emit(OP_LOAD_NAME, name.index, prev().line);
  755. ctx()->emit(OP_WITH_EXIT, BC_NOARG, prev().line);
  756. } break;
  757. /*************************************************/
  758. case TK("$label"): {
  759. if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE");
  760. consume(TK("@id"));
  761. bool ok = ctx()->add_label(prev().str());
  762. if(!ok) SyntaxError("label " + prev().str().escape() + " already exists");
  763. consume_end_stmt();
  764. } break;
  765. case TK("$goto"):
  766. if(mode()!=EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE");
  767. consume(TK("@id"));
  768. ctx()->emit(OP_GOTO, StrName(prev().str()).index, prev().line);
  769. consume_end_stmt();
  770. break;
  771. /*************************************************/
  772. // handle dangling expression or assignment
  773. default: {
  774. advance(-1); // do revert since we have pre-called advance() at the beginning
  775. EXPR_TUPLE();
  776. // eat variable's type hint
  777. if(match(TK(":"))) consume_type_hints();
  778. if(!try_compile_assignment()){
  779. if(!ctx()->s_expr.empty() && ctx()->s_expr.top()->is_starred()){
  780. SyntaxError();
  781. }
  782. ctx()->emit_expr();
  783. if((mode()==CELL_MODE || mode()==REPL_MODE) && name_scope()==NAME_GLOBAL){
  784. ctx()->emit(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE);
  785. }else{
  786. ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  787. }
  788. }
  789. consume_end_stmt();
  790. }
  791. }
  792. }
  793. void consume_type_hints(){
  794. EXPR();
  795. ctx()->s_expr.pop();
  796. }
  797. void compile_class(){
  798. consume(TK("@id"));
  799. int namei = StrName(prev().str()).index;
  800. int super_namei = -1;
  801. if(match(TK("(")) && match(TK("@id"))){
  802. super_namei = StrName(prev().str()).index;
  803. consume(TK(")"));
  804. }
  805. if(super_namei == -1) ctx()->emit(OP_LOAD_NONE, BC_NOARG, prev().line);
  806. else ctx()->emit(OP_LOAD_GLOBAL, super_namei, prev().line);
  807. ctx()->emit(OP_BEGIN_CLASS, namei, BC_KEEPLINE);
  808. ctx()->is_compiling_class = true;
  809. compile_block_body();
  810. ctx()->is_compiling_class = false;
  811. ctx()->emit(OP_END_CLASS, BC_NOARG, BC_KEEPLINE);
  812. }
  813. void _compile_f_args(FuncDecl_ decl, bool enable_type_hints){
  814. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  815. do {
  816. if(state == 3) SyntaxError("**kwargs should be the last argument");
  817. match_newlines();
  818. if(match(TK("*"))){
  819. if(state < 1) state = 1;
  820. else SyntaxError("*args should be placed before **kwargs");
  821. }
  822. else if(match(TK("**"))){
  823. state = 3;
  824. }
  825. consume(TK("@id"));
  826. StrName name = prev().str();
  827. // check duplicate argument name
  828. for(int i: decl->args){
  829. if(decl->code->varnames[i] == name) {
  830. SyntaxError("duplicate argument name");
  831. }
  832. }
  833. if(decl->starred_arg!=-1 && decl->code->varnames[decl->starred_arg] == name){
  834. SyntaxError("duplicate argument name");
  835. }
  836. for(auto& kv: decl->kwargs){
  837. if(decl->code->varnames[kv.key] == name){
  838. SyntaxError("duplicate argument name");
  839. }
  840. }
  841. // eat type hints
  842. if(enable_type_hints && match(TK(":"))) consume_type_hints();
  843. if(state == 0 && curr().type == TK("=")) state = 2;
  844. int index = ctx()->add_varname(name);
  845. switch (state)
  846. {
  847. case 0:
  848. decl->args.push_back(index);
  849. break;
  850. case 1:
  851. decl->starred_arg = index;
  852. state+=1;
  853. break;
  854. case 2: {
  855. consume(TK("="));
  856. PyObject* value = read_literal();
  857. if(value == nullptr){
  858. SyntaxError(Str("expect a literal, not ") + TK_STR(curr().type));
  859. }
  860. decl->kwargs.push_back(FuncDecl::KwArg{index, value});
  861. } break;
  862. case 3: SyntaxError("**kwargs is not supported yet"); break;
  863. }
  864. } while (match(TK(",")));
  865. }
  866. void compile_function(const std::vector<Expr_>& decorators={}){
  867. Str decl_name;
  868. consume(TK("@id"));
  869. decl_name = prev().str();
  870. FuncDecl_ decl = push_f_context(decl_name);
  871. consume(TK("("));
  872. if (!match(TK(")"))) {
  873. _compile_f_args(decl, true);
  874. consume(TK(")"));
  875. }
  876. if(match(TK("->"))) consume_type_hints();
  877. compile_block_body();
  878. pop_context();
  879. PyObject* docstring = nullptr;
  880. if(decl->code->codes.size()>=2 && decl->code->codes[0].op == OP_LOAD_CONST && decl->code->codes[1].op == OP_POP_TOP){
  881. PyObject* c = decl->code->consts[decl->code->codes[0].arg];
  882. if(is_type(c, vm->tp_str)){
  883. decl->code->codes[0].op = OP_NO_OP;
  884. decl->code->codes[1].op = OP_NO_OP;
  885. docstring = c;
  886. }
  887. }
  888. ctx()->emit(OP_LOAD_FUNCTION, ctx()->add_func_decl(decl), prev().line);
  889. if(docstring != nullptr){
  890. ctx()->emit(OP_SETUP_DOCSTRING, ctx()->add_const(docstring), prev().line);
  891. }
  892. // add decorators
  893. for(auto it=decorators.rbegin(); it!=decorators.rend(); ++it){
  894. (*it)->emit(ctx());
  895. ctx()->emit(OP_ROT_TWO, BC_NOARG, (*it)->line);
  896. ctx()->emit(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE);
  897. ctx()->emit(OP_ROT_TWO, BC_NOARG, BC_KEEPLINE);
  898. ctx()->emit(OP_CALL, 1, (*it)->line);
  899. }
  900. if(!ctx()->is_compiling_class){
  901. auto e = make_expr<NameExpr>(decl_name, name_scope());
  902. e->emit_store(ctx());
  903. }else{
  904. int index = StrName(decl_name).index;
  905. ctx()->emit(OP_STORE_CLASS_ATTR, index, prev().line);
  906. }
  907. }
  908. PyObject* to_object(const TokenValue& value){
  909. PyObject* obj = nullptr;
  910. if(std::holds_alternative<i64>(value)){
  911. obj = VAR(std::get<i64>(value));
  912. }
  913. if(std::holds_alternative<f64>(value)){
  914. obj = VAR(std::get<f64>(value));
  915. }
  916. if(std::holds_alternative<Str>(value)){
  917. obj = VAR(std::get<Str>(value));
  918. }
  919. if(obj == nullptr) FATAL_ERROR();
  920. return obj;
  921. }
  922. PyObject* read_literal(){
  923. advance();
  924. switch(prev().type){
  925. case TK("-"): {
  926. consume(TK("@num"));
  927. PyObject* val = to_object(prev().value);
  928. return vm->py_negate(val);
  929. }
  930. case TK("@num"): return to_object(prev().value);
  931. case TK("@str"): return to_object(prev().value);
  932. case TK("True"): return VAR(true);
  933. case TK("False"): return VAR(false);
  934. case TK("None"): return vm->None;
  935. case TK("..."): return vm->Ellipsis;
  936. default: break;
  937. }
  938. return nullptr;
  939. }
  940. void SyntaxError(Str msg){ lexer->throw_err("SyntaxError", msg, err().line, err().start); }
  941. void SyntaxError(){ lexer->throw_err("SyntaxError", "invalid syntax", err().line, err().start); }
  942. void IndentationError(Str msg){ lexer->throw_err("IndentationError", msg, err().line, err().start); }
  943. public:
  944. Compiler(VM* vm, const Str& source, const Str& filename, CompileMode mode, bool unknown_global_scope=false){
  945. this->vm = vm;
  946. this->used = false;
  947. this->unknown_global_scope = unknown_global_scope;
  948. this->lexer = std::make_unique<Lexer>(
  949. make_sp<SourceData>(source, filename, mode)
  950. );
  951. init_pratt_rules();
  952. }
  953. CodeObject_ compile(){
  954. if(used) FATAL_ERROR();
  955. used = true;
  956. tokens = lexer->run();
  957. // if(lexer->src->filename == "<stdin>"){
  958. // for(auto& t: tokens) std::cout << t.info() << std::endl;
  959. // }
  960. CodeObject_ code = push_global_context();
  961. advance(); // skip @sof, so prev() is always valid
  962. match_newlines(); // skip possible leading '\n'
  963. if(mode()==EVAL_MODE) {
  964. EXPR_TUPLE(false);
  965. consume(TK("@eof"));
  966. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  967. pop_context();
  968. return code;
  969. }else if(mode()==JSON_MODE){
  970. EXPR();
  971. Expr_ e = ctx()->s_expr.popx();
  972. if(!e->is_json_object()) SyntaxError("expect a JSON object, literal or array");
  973. consume(TK("@eof"));
  974. e->emit(ctx());
  975. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  976. pop_context();
  977. return code;
  978. }
  979. while (!match(TK("@eof"))) {
  980. if (match(TK("class"))) {
  981. compile_class();
  982. } else {
  983. compile_stmt();
  984. }
  985. match_newlines();
  986. }
  987. pop_context();
  988. return code;
  989. }
  990. };
  991. } // namespace pkpy