compiler.h 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054
  1. #pragma once
  2. #include "codeobject.h"
  3. #include "common.h"
  4. #include "expr.h"
  5. #include "obj.h"
  6. namespace pkpy{
  7. class Compiler;
  8. typedef void (Compiler::*PrattCallback)();
  9. struct PrattRule{
  10. PrattCallback prefix;
  11. PrattCallback infix;
  12. Precedence precedence;
  13. };
  14. class Compiler {
  15. inline static PrattRule rules[kTokenCount];
  16. std::unique_ptr<Lexer> lexer;
  17. stack<CodeEmitContext> contexts;
  18. VM* vm;
  19. bool unknown_global_scope; // for eval/exec() call
  20. bool used;
  21. // for parsing token stream
  22. int i = 0;
  23. std::vector<Token> tokens;
  24. const Token& prev() const{ return tokens.at(i-1); }
  25. const Token& curr() const{ return tokens.at(i); }
  26. const Token& next() const{ return tokens.at(i+1); }
  27. const Token& err() const{
  28. if(i >= tokens.size()) return prev();
  29. return curr();
  30. }
  31. void advance(int delta=1) { i += delta; }
  32. CodeEmitContext* ctx() { return &contexts.top(); }
  33. CompileMode mode() const{ return lexer->src->mode; }
  34. NameScope name_scope() const {
  35. auto s = contexts.size()>1 ? NAME_LOCAL : NAME_GLOBAL;
  36. if(unknown_global_scope && s == NAME_GLOBAL) s = NAME_GLOBAL_UNKNOWN;
  37. return s;
  38. }
  39. CodeObject_ push_global_context(){
  40. CodeObject_ co = make_sp<CodeObject>(lexer->src, lexer->src->filename);
  41. contexts.push(CodeEmitContext(vm, co, contexts.size()));
  42. return co;
  43. }
  44. FuncDecl_ push_f_context(Str name){
  45. FuncDecl_ decl = make_sp<FuncDecl>();
  46. decl->code = make_sp<CodeObject>(lexer->src, name);
  47. decl->nested = name_scope() == NAME_LOCAL;
  48. contexts.push(CodeEmitContext(vm, decl->code, contexts.size()));
  49. return decl;
  50. }
  51. void pop_context(){
  52. if(!ctx()->s_expr.empty()){
  53. throw std::runtime_error("!ctx()->s_expr.empty()\n" + ctx()->_log_s_expr());
  54. }
  55. // add a `return None` in the end as a guard
  56. // previously, we only do this if the last opcode is not a return
  57. // however, this is buggy...since there may be a jump to the end (out of bound) even if the last opcode is a return
  58. ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE);
  59. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  60. ctx()->co->optimize(vm);
  61. if(ctx()->co->varnames.size() > PK_MAX_CO_VARNAMES){
  62. SyntaxError("maximum number of local variables exceeded");
  63. }
  64. contexts.pop();
  65. }
  66. static void init_pratt_rules(){
  67. if(rules[TK(".")].precedence != PREC_NONE) return;
  68. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  69. #define METHOD(name) &Compiler::name
  70. #define NO_INFIX nullptr, PREC_NONE
  71. for(TokenIndex i=0; i<kTokenCount; i++) rules[i] = { nullptr, NO_INFIX };
  72. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  73. rules[TK("(")] = { METHOD(exprGroup), METHOD(exprCall), PREC_CALL };
  74. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscr), PREC_SUBSCRIPT };
  75. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  76. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  77. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  78. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  79. rules[TK("*")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_FACTOR };
  80. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  81. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  82. rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT };
  83. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  84. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  85. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  86. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  87. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  88. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  89. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  90. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  91. rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  92. rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  93. rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND };
  94. rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR };
  95. rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  96. rules[TK("if")] = { nullptr, METHOD(exprTernary), PREC_TERNARY };
  97. rules[TK(",")] = { nullptr, METHOD(exprTuple), PREC_TUPLE };
  98. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  99. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  100. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  101. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  102. rules[TK("not")] = { METHOD(exprNot), nullptr, PREC_LOGICAL_NOT };
  103. rules[TK("True")] = { METHOD(exprLiteral0), NO_INFIX };
  104. rules[TK("False")] = { METHOD(exprLiteral0), NO_INFIX };
  105. rules[TK("None")] = { METHOD(exprLiteral0), NO_INFIX };
  106. rules[TK("...")] = { METHOD(exprLiteral0), NO_INFIX };
  107. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  108. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  109. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  110. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  111. rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
  112. #undef METHOD
  113. #undef NO_INFIX
  114. }
  115. bool match(TokenIndex expected) {
  116. if (curr().type != expected) return false;
  117. advance();
  118. return true;
  119. }
  120. void consume(TokenIndex expected) {
  121. if (!match(expected)){
  122. SyntaxError(
  123. fmt("expected '", TK_STR(expected), "', but got '", TK_STR(curr().type), "'")
  124. );
  125. }
  126. }
  127. bool match_newlines_repl(){
  128. return match_newlines(mode()==REPL_MODE);
  129. }
  130. bool match_newlines(bool repl_throw=false) {
  131. bool consumed = false;
  132. if (curr().type == TK("@eol")) {
  133. while (curr().type == TK("@eol")) advance();
  134. consumed = true;
  135. }
  136. if (repl_throw && curr().type == TK("@eof")){
  137. throw NeedMoreLines(ctx()->is_compiling_class);
  138. }
  139. return consumed;
  140. }
  141. bool match_end_stmt() {
  142. if (match(TK(";"))) { match_newlines(); return true; }
  143. if (match_newlines() || curr().type == TK("@eof")) return true;
  144. if (curr().type == TK("@dedent")) return true;
  145. return false;
  146. }
  147. void consume_end_stmt() {
  148. if (!match_end_stmt()) SyntaxError("expected statement end");
  149. }
  150. /*************************************************/
  151. void EXPR(bool push_stack=true) {
  152. parse_expression(PREC_TUPLE+1, push_stack);
  153. }
  154. void EXPR_TUPLE(bool push_stack=true) {
  155. parse_expression(PREC_TUPLE, push_stack);
  156. }
  157. // special case for `for loop` and `comp`
  158. Expr_ EXPR_VARS(){
  159. std::vector<Expr_> items;
  160. do {
  161. consume(TK("@id"));
  162. items.push_back(make_expr<NameExpr>(prev().str(), name_scope()));
  163. } while(match(TK(",")));
  164. if(items.size()==1) return std::move(items[0]);
  165. return make_expr<TupleExpr>(std::move(items));
  166. }
  167. template <typename T, typename... Args>
  168. std::unique_ptr<T> make_expr(Args&&... args) {
  169. std::unique_ptr<T> expr = std::make_unique<T>(std::forward<Args>(args)...);
  170. expr->line = prev().line;
  171. return expr;
  172. }
  173. void exprLiteral(){
  174. ctx()->s_expr.push(make_expr<LiteralExpr>(prev().value));
  175. }
  176. void exprFString(){
  177. ctx()->s_expr.push(make_expr<FStringExpr>(std::get<Str>(prev().value)));
  178. }
  179. void exprLambda(){
  180. FuncDecl_ decl = push_f_context("<lambda>");
  181. auto e = make_expr<LambdaExpr>(decl);
  182. if(!match(TK(":"))){
  183. _compile_f_args(e->decl, false);
  184. consume(TK(":"));
  185. }
  186. // https://github.com/blueloveTH/pocketpy/issues/37
  187. parse_expression(PREC_LAMBDA + 1, false);
  188. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  189. pop_context();
  190. ctx()->s_expr.push(std::move(e));
  191. }
  192. void exprTuple(){
  193. std::vector<Expr_> items;
  194. items.push_back(ctx()->s_expr.popx());
  195. do {
  196. EXPR(); // NOTE: "1," will fail, "1,2" will be ok
  197. items.push_back(ctx()->s_expr.popx());
  198. } while(match(TK(",")));
  199. ctx()->s_expr.push(make_expr<TupleExpr>(
  200. std::move(items)
  201. ));
  202. }
  203. void exprOr(){
  204. auto e = make_expr<OrExpr>();
  205. e->lhs = ctx()->s_expr.popx();
  206. parse_expression(PREC_LOGICAL_OR + 1);
  207. e->rhs = ctx()->s_expr.popx();
  208. ctx()->s_expr.push(std::move(e));
  209. }
  210. void exprAnd(){
  211. auto e = make_expr<AndExpr>();
  212. e->lhs = ctx()->s_expr.popx();
  213. parse_expression(PREC_LOGICAL_AND + 1);
  214. e->rhs = ctx()->s_expr.popx();
  215. ctx()->s_expr.push(std::move(e));
  216. }
  217. void exprTernary(){
  218. auto e = make_expr<TernaryExpr>();
  219. e->true_expr = ctx()->s_expr.popx();
  220. // cond
  221. parse_expression(PREC_TERNARY + 1);
  222. e->cond = ctx()->s_expr.popx();
  223. consume(TK("else"));
  224. // if false
  225. parse_expression(PREC_TERNARY + 1);
  226. e->false_expr = ctx()->s_expr.popx();
  227. ctx()->s_expr.push(std::move(e));
  228. }
  229. void exprBinaryOp(){
  230. auto e = make_expr<BinaryExpr>();
  231. e->op = prev().type;
  232. e->lhs = ctx()->s_expr.popx();
  233. parse_expression(rules[e->op].precedence + 1);
  234. e->rhs = ctx()->s_expr.popx();
  235. ctx()->s_expr.push(std::move(e));
  236. }
  237. void exprNot() {
  238. parse_expression(PREC_LOGICAL_NOT + 1);
  239. ctx()->s_expr.push(make_expr<NotExpr>(ctx()->s_expr.popx()));
  240. }
  241. void exprUnaryOp(){
  242. TokenIndex op = prev().type;
  243. parse_expression(PREC_UNARY + 1);
  244. switch(op){
  245. case TK("-"):
  246. ctx()->s_expr.push(make_expr<NegatedExpr>(ctx()->s_expr.popx()));
  247. break;
  248. case TK("*"):
  249. ctx()->s_expr.push(make_expr<StarredExpr>(ctx()->s_expr.popx()));
  250. break;
  251. default: FATAL_ERROR();
  252. }
  253. }
  254. void exprGroup(){
  255. match_newlines_repl();
  256. EXPR_TUPLE(); // () is just for change precedence
  257. match_newlines_repl();
  258. consume(TK(")"));
  259. }
  260. template<typename T>
  261. void _consume_comp(Expr_ expr){
  262. static_assert(std::is_base_of<CompExpr, T>::value);
  263. std::unique_ptr<CompExpr> ce = make_expr<T>();
  264. ce->expr = std::move(expr);
  265. ce->vars = EXPR_VARS();
  266. consume(TK("in"));
  267. parse_expression(PREC_TERNARY + 1);
  268. ce->iter = ctx()->s_expr.popx();
  269. match_newlines_repl();
  270. if(match(TK("if"))){
  271. parse_expression(PREC_TERNARY + 1);
  272. ce->cond = ctx()->s_expr.popx();
  273. }
  274. ctx()->s_expr.push(std::move(ce));
  275. match_newlines_repl();
  276. }
  277. void exprList() {
  278. int line = prev().line;
  279. std::vector<Expr_> items;
  280. do {
  281. match_newlines_repl();
  282. if (curr().type == TK("]")) break;
  283. EXPR();
  284. items.push_back(ctx()->s_expr.popx());
  285. match_newlines_repl();
  286. if(items.size()==1 && match(TK("for"))){
  287. _consume_comp<ListCompExpr>(std::move(items[0]));
  288. consume(TK("]"));
  289. return;
  290. }
  291. match_newlines_repl();
  292. } while (match(TK(",")));
  293. consume(TK("]"));
  294. auto e = make_expr<ListExpr>(std::move(items));
  295. e->line = line; // override line
  296. ctx()->s_expr.push(std::move(e));
  297. }
  298. void exprMap() {
  299. bool parsing_dict = false; // {...} may be dict or set
  300. std::vector<Expr_> items;
  301. do {
  302. match_newlines_repl();
  303. if (curr().type == TK("}")) break;
  304. EXPR();
  305. if(curr().type == TK(":")) parsing_dict = true;
  306. if(parsing_dict){
  307. consume(TK(":"));
  308. EXPR();
  309. auto dict_item = make_expr<DictItemExpr>();
  310. dict_item->key = ctx()->s_expr.popx();
  311. dict_item->value = ctx()->s_expr.popx();
  312. items.push_back(std::move(dict_item));
  313. }else{
  314. items.push_back(ctx()->s_expr.popx());
  315. }
  316. match_newlines_repl();
  317. if(items.size()==1 && match(TK("for"))){
  318. if(parsing_dict) _consume_comp<DictCompExpr>(std::move(items[0]));
  319. else _consume_comp<SetCompExpr>(std::move(items[0]));
  320. consume(TK("}"));
  321. return;
  322. }
  323. match_newlines_repl();
  324. } while (match(TK(",")));
  325. consume(TK("}"));
  326. if(items.size()==0 || parsing_dict){
  327. auto e = make_expr<DictExpr>(std::move(items));
  328. ctx()->s_expr.push(std::move(e));
  329. }else{
  330. auto e = make_expr<SetExpr>(std::move(items));
  331. ctx()->s_expr.push(std::move(e));
  332. }
  333. }
  334. void exprCall() {
  335. auto e = make_expr<CallExpr>();
  336. e->callable = ctx()->s_expr.popx();
  337. do {
  338. match_newlines_repl();
  339. if (curr().type==TK(")")) break;
  340. if(curr().type==TK("@id") && next().type==TK("=")) {
  341. consume(TK("@id"));
  342. Str key = prev().str();
  343. consume(TK("="));
  344. EXPR();
  345. e->kwargs.push_back({key, ctx()->s_expr.popx()});
  346. } else{
  347. if(!e->kwargs.empty()) SyntaxError("positional argument follows keyword argument");
  348. EXPR();
  349. e->args.push_back(ctx()->s_expr.popx());
  350. }
  351. match_newlines_repl();
  352. } while (match(TK(",")));
  353. consume(TK(")"));
  354. if(e->args.size() > 32767) SyntaxError("too many positional arguments");
  355. if(e->kwargs.size() > 32767) SyntaxError("too many keyword arguments");
  356. ctx()->s_expr.push(std::move(e));
  357. }
  358. void exprName(){
  359. Str name = prev().str();
  360. NameScope scope = name_scope();
  361. if(ctx()->co->global_names.count(name)){
  362. scope = NAME_GLOBAL;
  363. }
  364. ctx()->s_expr.push(make_expr<NameExpr>(name, scope));
  365. }
  366. void exprAttrib() {
  367. consume(TK("@id"));
  368. ctx()->s_expr.push(
  369. make_expr<AttribExpr>(ctx()->s_expr.popx(), prev().str())
  370. );
  371. }
  372. void exprSubscr() {
  373. auto e = make_expr<SubscrExpr>();
  374. e->a = ctx()->s_expr.popx();
  375. auto slice = make_expr<SliceExpr>();
  376. bool is_slice = false;
  377. // a[<0> <state:1> : state<3> : state<5>]
  378. int state = 0;
  379. do{
  380. switch(state){
  381. case 0:
  382. if(match(TK(":"))){
  383. is_slice=true;
  384. state=2;
  385. break;
  386. }
  387. if(match(TK("]"))) SyntaxError();
  388. EXPR_TUPLE();
  389. slice->start = ctx()->s_expr.popx();
  390. state=1;
  391. break;
  392. case 1:
  393. if(match(TK(":"))){
  394. is_slice=true;
  395. state=2;
  396. break;
  397. }
  398. if(match(TK("]"))) goto __SUBSCR_END;
  399. SyntaxError("expected ':' or ']'");
  400. break;
  401. case 2:
  402. if(match(TK(":"))){
  403. state=4;
  404. break;
  405. }
  406. if(match(TK("]"))) goto __SUBSCR_END;
  407. EXPR_TUPLE();
  408. slice->stop = ctx()->s_expr.popx();
  409. state=3;
  410. break;
  411. case 3:
  412. if(match(TK(":"))){
  413. state=4;
  414. break;
  415. }
  416. if(match(TK("]"))) goto __SUBSCR_END;
  417. SyntaxError("expected ':' or ']'");
  418. break;
  419. case 4:
  420. if(match(TK("]"))) goto __SUBSCR_END;
  421. EXPR_TUPLE();
  422. slice->step = ctx()->s_expr.popx();
  423. state=5;
  424. break;
  425. case 5: consume(TK("]")); goto __SUBSCR_END;
  426. }
  427. }while(true);
  428. __SUBSCR_END:
  429. if(is_slice){
  430. e->b = std::move(slice);
  431. }else{
  432. if(state != 1) FATAL_ERROR();
  433. e->b = std::move(slice->start);
  434. }
  435. ctx()->s_expr.push(std::move(e));
  436. }
  437. void exprLiteral0() {
  438. ctx()->s_expr.push(make_expr<Literal0Expr>(prev().type));
  439. }
  440. void compile_block_body() {
  441. consume(TK(":"));
  442. if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){
  443. compile_stmt(); // inline block
  444. return;
  445. }
  446. if(!match_newlines(mode()==REPL_MODE)){
  447. SyntaxError("expected a new line after ':'");
  448. }
  449. consume(TK("@indent"));
  450. while (curr().type != TK("@dedent")) {
  451. match_newlines();
  452. compile_stmt();
  453. match_newlines();
  454. }
  455. consume(TK("@dedent"));
  456. }
  457. Str _compile_import() {
  458. if(name_scope() != NAME_GLOBAL) SyntaxError("import statement should be used in global scope");
  459. consume(TK("@id"));
  460. Str name = prev().str();
  461. ctx()->emit(OP_IMPORT_NAME, StrName(name).index, prev().line);
  462. return name;
  463. }
  464. // import a as b
  465. void compile_normal_import() {
  466. do {
  467. Str name = _compile_import();
  468. if (match(TK("as"))) {
  469. consume(TK("@id"));
  470. name = prev().str();
  471. }
  472. ctx()->emit(OP_STORE_GLOBAL, StrName(name).index, prev().line);
  473. } while (match(TK(",")));
  474. consume_end_stmt();
  475. }
  476. // from a import b as c, d as e
  477. void compile_from_import() {
  478. _compile_import();
  479. consume(TK("import"));
  480. if (match(TK("*"))) {
  481. ctx()->emit(OP_IMPORT_STAR, BC_NOARG, prev().line);
  482. consume_end_stmt();
  483. return;
  484. }
  485. do {
  486. ctx()->emit(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  487. consume(TK("@id"));
  488. Str name = prev().str();
  489. ctx()->emit(OP_LOAD_ATTR, StrName(name).index, prev().line);
  490. if (match(TK("as"))) {
  491. consume(TK("@id"));
  492. name = prev().str();
  493. }
  494. ctx()->emit(OP_STORE_GLOBAL, StrName(name).index, prev().line);
  495. } while (match(TK(",")));
  496. ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  497. consume_end_stmt();
  498. }
  499. void parse_expression(int precedence, bool push_stack=true) {
  500. advance();
  501. PrattCallback prefix = rules[prev().type].prefix;
  502. if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(prev().type));
  503. (this->*prefix)();
  504. while (rules[curr().type].precedence >= precedence) {
  505. TokenIndex op = curr().type;
  506. advance();
  507. PrattCallback infix = rules[op].infix;
  508. if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true");
  509. (this->*infix)();
  510. }
  511. if(!push_stack) ctx()->emit_expr();
  512. }
  513. void compile_if_stmt() {
  514. EXPR(false); // condition
  515. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  516. compile_block_body();
  517. if (match(TK("elif"))) {
  518. int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  519. ctx()->patch_jump(patch);
  520. compile_if_stmt();
  521. ctx()->patch_jump(exit_patch);
  522. } else if (match(TK("else"))) {
  523. int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  524. ctx()->patch_jump(patch);
  525. compile_block_body();
  526. ctx()->patch_jump(exit_patch);
  527. } else {
  528. ctx()->patch_jump(patch);
  529. }
  530. }
  531. void compile_while_loop() {
  532. ctx()->enter_block(WHILE_LOOP);
  533. EXPR(false); // condition
  534. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  535. compile_block_body();
  536. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  537. ctx()->patch_jump(patch);
  538. ctx()->exit_block();
  539. }
  540. void compile_for_loop() {
  541. Expr_ vars = EXPR_VARS();
  542. consume(TK("in"));
  543. EXPR_TUPLE(false);
  544. ctx()->emit(OP_GET_ITER, BC_NOARG, BC_KEEPLINE);
  545. ctx()->enter_block(FOR_LOOP);
  546. ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE);
  547. bool ok = vars->emit_store(ctx());
  548. if(!ok) SyntaxError(); // this error occurs in `vars` instead of this line, but...nevermind
  549. compile_block_body();
  550. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  551. ctx()->exit_block();
  552. }
  553. void compile_try_except() {
  554. ctx()->enter_block(TRY_EXCEPT);
  555. compile_block_body();
  556. std::vector<int> patches = {
  557. ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE)
  558. };
  559. ctx()->exit_block();
  560. do {
  561. consume(TK("except"));
  562. if(match(TK("@id"))){
  563. ctx()->emit(OP_EXCEPTION_MATCH, StrName(prev().str()).index, prev().line);
  564. }else{
  565. ctx()->emit(OP_LOAD_TRUE, BC_NOARG, BC_KEEPLINE);
  566. }
  567. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE);
  568. // pop the exception on match
  569. ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  570. compile_block_body();
  571. patches.push_back(ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE));
  572. ctx()->patch_jump(patch);
  573. }while(curr().type == TK("except"));
  574. // no match, re-raise
  575. ctx()->emit(OP_RE_RAISE, BC_NOARG, BC_KEEPLINE);
  576. for (int patch : patches) ctx()->patch_jump(patch);
  577. }
  578. void compile_decorated(){
  579. std::vector<Expr_> decorators;
  580. do{
  581. EXPR();
  582. decorators.push_back(ctx()->s_expr.popx());
  583. if(!match_newlines_repl()) SyntaxError();
  584. }while(match(TK("@")));
  585. consume(TK("def"));
  586. compile_function(decorators);
  587. }
  588. bool try_compile_assignment(){
  589. Expr* lhs_p = ctx()->s_expr.top().get();
  590. bool inplace;
  591. switch (curr().type) {
  592. case TK("+="): case TK("-="): case TK("*="): case TK("/="): case TK("//="): case TK("%="):
  593. case TK("<<="): case TK(">>="): case TK("&="): case TK("|="): case TK("^="): {
  594. if(ctx()->is_compiling_class) SyntaxError();
  595. inplace = true;
  596. advance();
  597. auto e = make_expr<BinaryExpr>();
  598. e->op = prev().type - 1; // -1 to remove =
  599. e->lhs = ctx()->s_expr.popx();
  600. EXPR_TUPLE();
  601. e->rhs = ctx()->s_expr.popx();
  602. ctx()->s_expr.push(std::move(e));
  603. } break;
  604. case TK("="):
  605. inplace = false;
  606. advance();
  607. EXPR_TUPLE();
  608. break;
  609. default: return false;
  610. }
  611. // std::cout << ctx()->_log_s_expr() << std::endl;
  612. Expr_ rhs = ctx()->s_expr.popx();
  613. if(lhs_p->is_starred() || rhs->is_starred()){
  614. SyntaxError("can't use starred expression here");
  615. }
  616. rhs->emit(ctx());
  617. bool ok = lhs_p->emit_store(ctx());
  618. if(!ok) SyntaxError();
  619. if(!inplace) ctx()->s_expr.pop();
  620. return true;
  621. }
  622. void compile_stmt() {
  623. advance();
  624. int kw_line = prev().line; // backup line number
  625. switch(prev().type){
  626. case TK("break"):
  627. if (!ctx()->is_curr_block_loop()) SyntaxError("'break' outside loop");
  628. ctx()->emit(OP_LOOP_BREAK, BC_NOARG, kw_line);
  629. consume_end_stmt();
  630. break;
  631. case TK("continue"):
  632. if (!ctx()->is_curr_block_loop()) SyntaxError("'continue' not properly in loop");
  633. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, kw_line);
  634. consume_end_stmt();
  635. break;
  636. case TK("yield"):
  637. if (contexts.size() <= 1) SyntaxError("'yield' outside function");
  638. EXPR_TUPLE(false);
  639. // if yield present, mark the function as generator
  640. ctx()->co->is_generator = true;
  641. ctx()->emit(OP_YIELD_VALUE, BC_NOARG, kw_line);
  642. consume_end_stmt();
  643. break;
  644. case TK("yield from"):
  645. if (contexts.size() <= 1) SyntaxError("'yield from' outside function");
  646. EXPR_TUPLE(false);
  647. // if yield from present, mark the function as generator
  648. ctx()->co->is_generator = true;
  649. ctx()->emit(OP_GET_ITER, BC_NOARG, kw_line);
  650. ctx()->enter_block(FOR_LOOP);
  651. ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE);
  652. ctx()->emit(OP_YIELD_VALUE, BC_NOARG, BC_KEEPLINE);
  653. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  654. ctx()->exit_block();
  655. consume_end_stmt();
  656. break;
  657. case TK("return"):
  658. if (contexts.size() <= 1) SyntaxError("'return' outside function");
  659. if(match_end_stmt()){
  660. ctx()->emit(OP_LOAD_NONE, BC_NOARG, kw_line);
  661. }else{
  662. EXPR_TUPLE(false);
  663. consume_end_stmt();
  664. }
  665. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, kw_line);
  666. break;
  667. /*************************************************/
  668. case TK("if"): compile_if_stmt(); break;
  669. case TK("while"): compile_while_loop(); break;
  670. case TK("for"): compile_for_loop(); break;
  671. case TK("import"): compile_normal_import(); break;
  672. case TK("from"): compile_from_import(); break;
  673. case TK("def"): compile_function(); break;
  674. case TK("@"): compile_decorated(); break;
  675. case TK("try"): compile_try_except(); break;
  676. case TK("pass"): consume_end_stmt(); break;
  677. /*************************************************/
  678. case TK("assert"):
  679. EXPR_TUPLE(false);
  680. ctx()->emit(OP_ASSERT, BC_NOARG, kw_line);
  681. consume_end_stmt();
  682. break;
  683. case TK("global"):
  684. do {
  685. consume(TK("@id"));
  686. ctx()->co->global_names.insert(prev().str());
  687. } while (match(TK(",")));
  688. consume_end_stmt();
  689. break;
  690. case TK("raise"): {
  691. consume(TK("@id"));
  692. int dummy_t = StrName(prev().str()).index;
  693. if(match(TK("(")) && !match(TK(")"))){
  694. EXPR(false); consume(TK(")"));
  695. }else{
  696. ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE);
  697. }
  698. ctx()->emit(OP_RAISE, dummy_t, kw_line);
  699. consume_end_stmt();
  700. } break;
  701. case TK("del"): {
  702. EXPR_TUPLE();
  703. Expr_ e = ctx()->s_expr.popx();
  704. bool ok = e->emit_del(ctx());
  705. if(!ok) SyntaxError();
  706. consume_end_stmt();
  707. } break;
  708. case TK("with"): {
  709. EXPR(false);
  710. consume(TK("as"));
  711. consume(TK("@id"));
  712. StrName name(prev().str());
  713. ctx()->emit(OP_STORE_NAME, name.index, prev().line);
  714. ctx()->emit(OP_LOAD_NAME, name.index, prev().line);
  715. ctx()->emit(OP_WITH_ENTER, BC_NOARG, prev().line);
  716. compile_block_body();
  717. ctx()->emit(OP_LOAD_NAME, name.index, prev().line);
  718. ctx()->emit(OP_WITH_EXIT, BC_NOARG, prev().line);
  719. } break;
  720. /*************************************************/
  721. case TK("$label"): {
  722. if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE");
  723. consume(TK("@id"));
  724. bool ok = ctx()->add_label(prev().str());
  725. if(!ok) SyntaxError("label " + prev().str().escape() + " already exists");
  726. consume_end_stmt();
  727. } break;
  728. case TK("$goto"):
  729. if(mode()!=EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE");
  730. consume(TK("@id"));
  731. ctx()->emit(OP_GOTO, StrName(prev().str()).index, prev().line);
  732. consume_end_stmt();
  733. break;
  734. /*************************************************/
  735. // handle dangling expression or assignment
  736. default: {
  737. advance(-1); // do revert since we have pre-called advance() at the beginning
  738. EXPR_TUPLE();
  739. if(!try_compile_assignment()){
  740. ctx()->emit_expr();
  741. if(mode()==REPL_MODE && name_scope()==NAME_GLOBAL){
  742. ctx()->emit(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE);
  743. }else{
  744. ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  745. }
  746. }
  747. consume_end_stmt();
  748. }
  749. }
  750. }
  751. void compile_class(){
  752. consume(TK("@id"));
  753. int namei = StrName(prev().str()).index;
  754. int super_namei = -1;
  755. if(match(TK("(")) && match(TK("@id"))){
  756. super_namei = StrName(prev().str()).index;
  757. consume(TK(")"));
  758. }
  759. if(super_namei == -1) ctx()->emit(OP_LOAD_NONE, BC_NOARG, prev().line);
  760. else ctx()->emit(OP_LOAD_GLOBAL, super_namei, prev().line);
  761. ctx()->emit(OP_BEGIN_CLASS, namei, BC_KEEPLINE);
  762. ctx()->is_compiling_class = true;
  763. compile_block_body();
  764. ctx()->is_compiling_class = false;
  765. ctx()->emit(OP_END_CLASS, BC_NOARG, BC_KEEPLINE);
  766. }
  767. void _compile_f_args(FuncDecl_ decl, bool enable_type_hints){
  768. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  769. do {
  770. if(state == 3) SyntaxError("**kwargs should be the last argument");
  771. match_newlines();
  772. if(match(TK("*"))){
  773. if(state < 1) state = 1;
  774. else SyntaxError("*args should be placed before **kwargs");
  775. }
  776. else if(match(TK("**"))){
  777. state = 3;
  778. }
  779. consume(TK("@id"));
  780. StrName name = prev().str();
  781. // check duplicate argument name
  782. for(int i: decl->args){
  783. if(decl->code->varnames[i] == name) {
  784. SyntaxError("duplicate argument name");
  785. }
  786. }
  787. if(decl->starred_arg!=-1 && decl->code->varnames[decl->starred_arg] == name){
  788. SyntaxError("duplicate argument name");
  789. }
  790. for(auto& kv: decl->kwargs){
  791. if(decl->code->varnames[kv.key] == name){
  792. SyntaxError("duplicate argument name");
  793. }
  794. }
  795. // eat type hints
  796. if(enable_type_hints && match(TK(":"))) consume(TK("@id"));
  797. if(state == 0 && curr().type == TK("=")) state = 2;
  798. int index = ctx()->add_varname(name);
  799. switch (state)
  800. {
  801. case 0:
  802. decl->args.push_back(index);
  803. break;
  804. case 1:
  805. decl->starred_arg = index;
  806. state+=1;
  807. break;
  808. case 2: {
  809. consume(TK("="));
  810. PyObject* value = read_literal();
  811. if(value == nullptr){
  812. SyntaxError(Str("expect a literal, not ") + TK_STR(curr().type));
  813. }
  814. decl->kwargs.push_back(FuncDecl::KwArg{index, value});
  815. } break;
  816. case 3: SyntaxError("**kwargs is not supported yet"); break;
  817. }
  818. } while (match(TK(",")));
  819. }
  820. void compile_function(const std::vector<Expr_>& decorators={}){
  821. Str obj_name;
  822. Str decl_name;
  823. consume(TK("@id"));
  824. decl_name = prev().str();
  825. if(!ctx()->is_compiling_class && match(TK("@"))){
  826. consume(TK("@id"));
  827. obj_name = decl_name;
  828. decl_name = prev().str();
  829. }
  830. FuncDecl_ decl = push_f_context(decl_name);
  831. consume(TK("("));
  832. if (!match(TK(")"))) {
  833. _compile_f_args(decl, true);
  834. consume(TK(")"));
  835. }
  836. if(match(TK("->"))){
  837. if(!match(TK("None"))) consume(TK("@id"));
  838. }
  839. compile_block_body();
  840. pop_context();
  841. PyObject* docstring = nullptr;
  842. if(decl->code->codes.size()>=2 && decl->code->codes[0].op == OP_LOAD_CONST && decl->code->codes[1].op == OP_POP_TOP){
  843. PyObject* c = decl->code->consts[decl->code->codes[0].arg];
  844. if(is_type(c, vm->tp_str)){
  845. decl->code->codes[0].op = OP_NO_OP;
  846. decl->code->codes[1].op = OP_NO_OP;
  847. docstring = c;
  848. }
  849. }
  850. ctx()->emit(OP_LOAD_FUNCTION, ctx()->add_func_decl(decl), prev().line);
  851. if(docstring != nullptr){
  852. ctx()->emit(OP_SETUP_DOCSTRING, ctx()->add_const(docstring), prev().line);
  853. }
  854. // add decorators
  855. for(auto it=decorators.rbegin(); it!=decorators.rend(); ++it){
  856. (*it)->emit(ctx());
  857. ctx()->emit(OP_ROT_TWO, BC_NOARG, (*it)->line);
  858. ctx()->emit(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE);
  859. ctx()->emit(OP_ROT_TWO, BC_NOARG, BC_KEEPLINE);
  860. ctx()->emit(OP_CALL, 1, (*it)->line);
  861. }
  862. if(!ctx()->is_compiling_class){
  863. if(obj_name.empty()){
  864. auto e = make_expr<NameExpr>(decl_name, name_scope());
  865. e->emit_store(ctx());
  866. } else {
  867. ctx()->emit(OP_LOAD_GLOBAL, StrName(obj_name).index, prev().line);
  868. int index = StrName(decl_name).index;
  869. ctx()->emit(OP_STORE_ATTR, index, prev().line);
  870. }
  871. }else{
  872. int index = StrName(decl_name).index;
  873. ctx()->emit(OP_STORE_CLASS_ATTR, index, prev().line);
  874. }
  875. }
  876. PyObject* to_object(const TokenValue& value){
  877. PyObject* obj = nullptr;
  878. if(std::holds_alternative<i64>(value)){
  879. obj = VAR(std::get<i64>(value));
  880. }
  881. if(std::holds_alternative<f64>(value)){
  882. obj = VAR(std::get<f64>(value));
  883. }
  884. if(std::holds_alternative<Str>(value)){
  885. obj = VAR(std::get<Str>(value));
  886. }
  887. if(obj == nullptr) FATAL_ERROR();
  888. return obj;
  889. }
  890. PyObject* read_literal(){
  891. advance();
  892. switch(prev().type){
  893. case TK("-"): {
  894. consume(TK("@num"));
  895. PyObject* val = to_object(prev().value);
  896. return vm->num_negated(val);
  897. }
  898. case TK("@num"): return to_object(prev().value);
  899. case TK("@str"): return to_object(prev().value);
  900. case TK("True"): return VAR(true);
  901. case TK("False"): return VAR(false);
  902. case TK("None"): return vm->None;
  903. case TK("..."): return vm->Ellipsis;
  904. default: break;
  905. }
  906. return nullptr;
  907. }
  908. void SyntaxError(Str msg){ lexer->throw_err("SyntaxError", msg, err().line, err().start); }
  909. void SyntaxError(){ lexer->throw_err("SyntaxError", "invalid syntax", err().line, err().start); }
  910. void IndentationError(Str msg){ lexer->throw_err("IndentationError", msg, err().line, err().start); }
  911. public:
  912. Compiler(VM* vm, const Str& source, const Str& filename, CompileMode mode, bool unknown_global_scope=false){
  913. this->vm = vm;
  914. this->used = false;
  915. this->unknown_global_scope = unknown_global_scope;
  916. this->lexer = std::make_unique<Lexer>(
  917. make_sp<SourceData>(source, filename, mode)
  918. );
  919. init_pratt_rules();
  920. }
  921. CodeObject_ compile(){
  922. if(used) FATAL_ERROR();
  923. used = true;
  924. tokens = lexer->run();
  925. // if(lexer->src->filename == "<stdin>"){
  926. // for(auto& t: tokens) std::cout << t.info() << std::endl;
  927. // }
  928. CodeObject_ code = push_global_context();
  929. advance(); // skip @sof, so prev() is always valid
  930. match_newlines(); // skip possible leading '\n'
  931. if(mode()==EVAL_MODE) {
  932. EXPR_TUPLE(false);
  933. consume(TK("@eof"));
  934. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  935. pop_context();
  936. return code;
  937. }else if(mode()==JSON_MODE){
  938. EXPR();
  939. Expr_ e = ctx()->s_expr.popx();
  940. if(!e->is_json_object()) SyntaxError("expect a JSON object, literal or array");
  941. consume(TK("@eof"));
  942. e->emit(ctx());
  943. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  944. pop_context();
  945. return code;
  946. }
  947. while (!match(TK("@eof"))) {
  948. if (match(TK("class"))) {
  949. compile_class();
  950. } else {
  951. compile_stmt();
  952. }
  953. match_newlines();
  954. }
  955. pop_context();
  956. return code;
  957. }
  958. };
  959. } // namespace pkpy