compiler.h 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039
  1. #pragma once
  2. #include "codeobject.h"
  3. #include "common.h"
  4. #include "expr.h"
  5. #include "obj.h"
  6. namespace pkpy{
  7. class Compiler;
  8. typedef void (Compiler::*PrattCallback)();
  9. struct PrattRule{
  10. PrattCallback prefix;
  11. PrattCallback infix;
  12. Precedence precedence;
  13. };
  14. class Compiler {
  15. inline static PrattRule rules[kTokenCount];
  16. std::unique_ptr<Lexer> lexer;
  17. stack<CodeEmitContext> contexts;
  18. VM* vm;
  19. bool unknown_global_scope; // for eval/exec() call
  20. bool used;
  21. // for parsing token stream
  22. int i = 0;
  23. std::vector<Token> tokens;
  24. const Token& prev() const{ return tokens.at(i-1); }
  25. const Token& curr() const{ return tokens.at(i); }
  26. const Token& next() const{ return tokens.at(i+1); }
  27. const Token& err() const{
  28. if(i >= tokens.size()) return prev();
  29. return curr();
  30. }
  31. void advance(int delta=1) { i += delta; }
  32. CodeEmitContext* ctx() { return &contexts.top(); }
  33. CompileMode mode() const{ return lexer->src->mode; }
  34. NameScope name_scope() const {
  35. auto s = contexts.size()>1 ? NAME_LOCAL : NAME_GLOBAL;
  36. if(unknown_global_scope && s == NAME_GLOBAL) s = NAME_GLOBAL_UNKNOWN;
  37. return s;
  38. }
  39. CodeObject_ push_global_context(){
  40. CodeObject_ co = make_sp<CodeObject>(lexer->src, lexer->src->filename);
  41. contexts.push(CodeEmitContext(vm, co, contexts.size()));
  42. return co;
  43. }
  44. FuncDecl_ push_f_context(Str name){
  45. FuncDecl_ decl = make_sp<FuncDecl>();
  46. decl->code = make_sp<CodeObject>(lexer->src, name);
  47. decl->nested = name_scope() == NAME_LOCAL;
  48. contexts.push(CodeEmitContext(vm, decl->code, contexts.size()));
  49. return decl;
  50. }
  51. void pop_context(){
  52. if(!ctx()->s_expr.empty()){
  53. throw std::runtime_error("!ctx()->s_expr.empty()\n" + ctx()->_log_s_expr());
  54. }
  55. // if the last op does not return, add a default return None
  56. if(ctx()->co->codes.empty() || ctx()->co->codes.back().op != OP_RETURN_VALUE){
  57. ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE);
  58. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  59. }
  60. ctx()->co->optimize(vm);
  61. contexts.pop();
  62. }
  63. static void init_pratt_rules(){
  64. if(rules[TK(".")].precedence != PREC_NONE) return;
  65. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  66. #define METHOD(name) &Compiler::name
  67. #define NO_INFIX nullptr, PREC_NONE
  68. for(TokenIndex i=0; i<kTokenCount; i++) rules[i] = { nullptr, NO_INFIX };
  69. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  70. rules[TK("(")] = { METHOD(exprGroup), METHOD(exprCall), PREC_CALL };
  71. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscr), PREC_SUBSCRIPT };
  72. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  73. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  74. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  75. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  76. rules[TK("*")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_FACTOR };
  77. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  78. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  79. rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT };
  80. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  81. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  82. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  83. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  84. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  85. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  86. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  87. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  88. rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  89. rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  90. rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND };
  91. rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR };
  92. rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  93. rules[TK("?")] = { nullptr, METHOD(exprTernary), PREC_TERNARY };
  94. rules[TK("if")] = { nullptr, METHOD(exprTernary), PREC_TERNARY };
  95. rules[TK(",")] = { nullptr, METHOD(exprTuple), PREC_TUPLE };
  96. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  97. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  98. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  99. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  100. rules[TK("not")] = { METHOD(exprNot), nullptr, PREC_LOGICAL_NOT };
  101. rules[TK("True")] = { METHOD(exprLiteral0), NO_INFIX };
  102. rules[TK("False")] = { METHOD(exprLiteral0), NO_INFIX };
  103. rules[TK("None")] = { METHOD(exprLiteral0), NO_INFIX };
  104. rules[TK("...")] = { METHOD(exprLiteral0), NO_INFIX };
  105. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  106. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  107. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  108. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  109. rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
  110. #undef METHOD
  111. #undef NO_INFIX
  112. }
  113. bool match(TokenIndex expected) {
  114. if (curr().type != expected) return false;
  115. advance();
  116. return true;
  117. }
  118. void consume(TokenIndex expected) {
  119. if (!match(expected)){
  120. SyntaxError(
  121. fmt("expected '", TK_STR(expected), "', but got '", TK_STR(curr().type), "'")
  122. );
  123. }
  124. }
  125. bool match_newlines_repl(){
  126. return match_newlines(mode()==REPL_MODE);
  127. }
  128. bool match_newlines(bool repl_throw=false) {
  129. bool consumed = false;
  130. if (curr().type == TK("@eol")) {
  131. while (curr().type == TK("@eol")) advance();
  132. consumed = true;
  133. }
  134. if (repl_throw && curr().type == TK("@eof")){
  135. throw NeedMoreLines(ctx()->is_compiling_class);
  136. }
  137. return consumed;
  138. }
  139. bool match_end_stmt() {
  140. if (match(TK(";"))) { match_newlines(); return true; }
  141. if (match_newlines() || curr().type == TK("@eof")) return true;
  142. if (curr().type == TK("@dedent")) return true;
  143. return false;
  144. }
  145. void consume_end_stmt() {
  146. if (!match_end_stmt()) SyntaxError("expected statement end");
  147. }
  148. /*************************************************/
  149. void EXPR(bool push_stack=true) {
  150. parse_expression(PREC_TUPLE+1, push_stack);
  151. }
  152. void EXPR_TUPLE(bool push_stack=true) {
  153. parse_expression(PREC_TUPLE, push_stack);
  154. }
  155. // special case for `for loop` and `comp`
  156. Expr_ EXPR_VARS(){
  157. std::vector<Expr_> items;
  158. do {
  159. consume(TK("@id"));
  160. items.push_back(make_expr<NameExpr>(prev().str(), name_scope()));
  161. } while(match(TK(",")));
  162. if(items.size()==1) return std::move(items[0]);
  163. return make_expr<TupleExpr>(std::move(items));
  164. }
  165. template <typename T, typename... Args>
  166. std::unique_ptr<T> make_expr(Args&&... args) {
  167. std::unique_ptr<T> expr = std::make_unique<T>(std::forward<Args>(args)...);
  168. expr->line = prev().line;
  169. return expr;
  170. }
  171. void exprLiteral(){
  172. ctx()->s_expr.push(make_expr<LiteralExpr>(prev().value));
  173. }
  174. void exprFString(){
  175. ctx()->s_expr.push(make_expr<FStringExpr>(std::get<Str>(prev().value)));
  176. }
  177. void exprLambda(){
  178. FuncDecl_ decl = push_f_context("<lambda>");
  179. auto e = make_expr<LambdaExpr>(decl);
  180. if(!match(TK(":"))){
  181. _compile_f_args(e->decl, false);
  182. consume(TK(":"));
  183. }
  184. // https://github.com/blueloveTH/pocketpy/issues/37
  185. parse_expression(PREC_LAMBDA + 1, false);
  186. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  187. pop_context();
  188. ctx()->s_expr.push(std::move(e));
  189. }
  190. void exprTuple(){
  191. std::vector<Expr_> items;
  192. items.push_back(ctx()->s_expr.popx());
  193. do {
  194. EXPR(); // NOTE: "1," will fail, "1,2" will be ok
  195. items.push_back(ctx()->s_expr.popx());
  196. } while(match(TK(",")));
  197. ctx()->s_expr.push(make_expr<TupleExpr>(
  198. std::move(items)
  199. ));
  200. }
  201. void exprOr(){
  202. auto e = make_expr<OrExpr>();
  203. e->lhs = ctx()->s_expr.popx();
  204. parse_expression(PREC_LOGICAL_OR + 1);
  205. e->rhs = ctx()->s_expr.popx();
  206. ctx()->s_expr.push(std::move(e));
  207. }
  208. void exprAnd(){
  209. auto e = make_expr<AndExpr>();
  210. e->lhs = ctx()->s_expr.popx();
  211. parse_expression(PREC_LOGICAL_AND + 1);
  212. e->rhs = ctx()->s_expr.popx();
  213. ctx()->s_expr.push(std::move(e));
  214. }
  215. void exprTernary(){
  216. auto e = make_expr<TernaryExpr>();
  217. if(prev().type == TK("if")){
  218. e->true_expr = ctx()->s_expr.popx();
  219. // cond
  220. parse_expression(PREC_TERNARY + 1);
  221. e->cond = ctx()->s_expr.popx();
  222. consume(TK("else"));
  223. // if false
  224. parse_expression(PREC_TERNARY + 1);
  225. e->false_expr = ctx()->s_expr.popx();
  226. }else{ // ?:
  227. e->cond = ctx()->s_expr.popx();
  228. // if true
  229. parse_expression(PREC_TERNARY + 1);
  230. e->true_expr = ctx()->s_expr.popx();
  231. consume(TK(":"));
  232. // if false
  233. parse_expression(PREC_TERNARY + 1);
  234. e->false_expr = ctx()->s_expr.popx();
  235. }
  236. ctx()->s_expr.push(std::move(e));
  237. }
  238. void exprBinaryOp(){
  239. auto e = make_expr<BinaryExpr>();
  240. e->op = prev().type;
  241. e->lhs = ctx()->s_expr.popx();
  242. parse_expression(rules[e->op].precedence + 1);
  243. e->rhs = ctx()->s_expr.popx();
  244. ctx()->s_expr.push(std::move(e));
  245. }
  246. void exprNot() {
  247. parse_expression(PREC_LOGICAL_NOT + 1);
  248. ctx()->s_expr.push(make_expr<NotExpr>(ctx()->s_expr.popx()));
  249. }
  250. void exprUnaryOp(){
  251. TokenIndex op = prev().type;
  252. parse_expression(PREC_UNARY + 1);
  253. switch(op){
  254. case TK("-"):
  255. ctx()->s_expr.push(make_expr<NegatedExpr>(ctx()->s_expr.popx()));
  256. break;
  257. case TK("*"):
  258. ctx()->s_expr.push(make_expr<StarredExpr>(ctx()->s_expr.popx()));
  259. break;
  260. default: FATAL_ERROR();
  261. }
  262. }
  263. void exprGroup(){
  264. match_newlines_repl();
  265. EXPR_TUPLE(); // () is just for change precedence
  266. match_newlines_repl();
  267. consume(TK(")"));
  268. }
  269. template<typename T>
  270. void _consume_comp(Expr_ expr){
  271. static_assert(std::is_base_of<CompExpr, T>::value);
  272. std::unique_ptr<CompExpr> ce = make_expr<T>();
  273. ce->expr = std::move(expr);
  274. ce->vars = EXPR_VARS();
  275. consume(TK("in"));
  276. parse_expression(PREC_TERNARY + 1);
  277. ce->iter = ctx()->s_expr.popx();
  278. match_newlines_repl();
  279. if(match(TK("if"))){
  280. parse_expression(PREC_TERNARY + 1);
  281. ce->cond = ctx()->s_expr.popx();
  282. }
  283. ctx()->s_expr.push(std::move(ce));
  284. match_newlines_repl();
  285. }
  286. void exprList() {
  287. int line = prev().line;
  288. std::vector<Expr_> items;
  289. do {
  290. match_newlines_repl();
  291. if (curr().type == TK("]")) break;
  292. EXPR();
  293. items.push_back(ctx()->s_expr.popx());
  294. match_newlines_repl();
  295. if(items.size()==1 && match(TK("for"))){
  296. _consume_comp<ListCompExpr>(std::move(items[0]));
  297. consume(TK("]"));
  298. return;
  299. }
  300. match_newlines_repl();
  301. } while (match(TK(",")));
  302. consume(TK("]"));
  303. auto e = make_expr<ListExpr>(std::move(items));
  304. e->line = line; // override line
  305. ctx()->s_expr.push(std::move(e));
  306. }
  307. void exprMap() {
  308. bool parsing_dict = false; // {...} may be dict or set
  309. std::vector<Expr_> items;
  310. do {
  311. match_newlines_repl();
  312. if (curr().type == TK("}")) break;
  313. EXPR();
  314. if(curr().type == TK(":")) parsing_dict = true;
  315. if(parsing_dict){
  316. consume(TK(":"));
  317. EXPR();
  318. auto dict_item = make_expr<DictItemExpr>();
  319. dict_item->key = ctx()->s_expr.popx();
  320. dict_item->value = ctx()->s_expr.popx();
  321. items.push_back(std::move(dict_item));
  322. }else{
  323. items.push_back(ctx()->s_expr.popx());
  324. }
  325. match_newlines_repl();
  326. if(items.size()==1 && match(TK("for"))){
  327. if(parsing_dict) _consume_comp<DictCompExpr>(std::move(items[0]));
  328. else _consume_comp<SetCompExpr>(std::move(items[0]));
  329. consume(TK("}"));
  330. return;
  331. }
  332. match_newlines_repl();
  333. } while (match(TK(",")));
  334. consume(TK("}"));
  335. if(items.size()==0 || parsing_dict){
  336. auto e = make_expr<DictExpr>(std::move(items));
  337. ctx()->s_expr.push(std::move(e));
  338. }else{
  339. auto e = make_expr<SetExpr>(std::move(items));
  340. ctx()->s_expr.push(std::move(e));
  341. }
  342. }
  343. void exprCall() {
  344. auto e = make_expr<CallExpr>();
  345. e->callable = ctx()->s_expr.popx();
  346. do {
  347. match_newlines_repl();
  348. if (curr().type==TK(")")) break;
  349. if(curr().type==TK("@id") && next().type==TK("=")) {
  350. consume(TK("@id"));
  351. Str key = prev().str();
  352. consume(TK("="));
  353. EXPR();
  354. e->kwargs.push_back({key, ctx()->s_expr.popx()});
  355. } else{
  356. if(!e->kwargs.empty()) SyntaxError("positional argument follows keyword argument");
  357. EXPR();
  358. e->args.push_back(ctx()->s_expr.popx());
  359. }
  360. match_newlines_repl();
  361. } while (match(TK(",")));
  362. consume(TK(")"));
  363. if(e->args.size() > 32767) SyntaxError("too many positional arguments");
  364. if(e->kwargs.size() > 32767) SyntaxError("too many keyword arguments");
  365. ctx()->s_expr.push(std::move(e));
  366. }
  367. void exprName(){
  368. Str name = prev().str();
  369. NameScope scope = name_scope();
  370. if(ctx()->co->global_names.count(name)){
  371. scope = NAME_GLOBAL;
  372. }
  373. ctx()->s_expr.push(make_expr<NameExpr>(name, scope));
  374. }
  375. void exprAttrib() {
  376. consume(TK("@id"));
  377. ctx()->s_expr.push(
  378. make_expr<AttribExpr>(ctx()->s_expr.popx(), prev().str())
  379. );
  380. }
  381. void exprSubscr() {
  382. auto e = make_expr<SubscrExpr>();
  383. e->a = ctx()->s_expr.popx();
  384. auto slice = make_expr<SliceExpr>();
  385. bool is_slice = false;
  386. // a[<0> <state:1> : state<3> : state<5>]
  387. int state = 0;
  388. do{
  389. switch(state){
  390. case 0:
  391. if(match(TK(":"))){
  392. is_slice=true;
  393. state=2;
  394. break;
  395. }
  396. if(match(TK("]"))) SyntaxError();
  397. EXPR_TUPLE();
  398. slice->start = ctx()->s_expr.popx();
  399. state=1;
  400. break;
  401. case 1:
  402. if(match(TK(":"))){
  403. is_slice=true;
  404. state=2;
  405. break;
  406. }
  407. if(match(TK("]"))) goto __SUBSCR_END;
  408. SyntaxError("expected ':' or ']'");
  409. break;
  410. case 2:
  411. if(match(TK(":"))){
  412. state=4;
  413. break;
  414. }
  415. if(match(TK("]"))) goto __SUBSCR_END;
  416. EXPR_TUPLE();
  417. slice->stop = ctx()->s_expr.popx();
  418. state=3;
  419. break;
  420. case 3:
  421. if(match(TK(":"))){
  422. state=4;
  423. break;
  424. }
  425. if(match(TK("]"))) goto __SUBSCR_END;
  426. SyntaxError("expected ':' or ']'");
  427. break;
  428. case 4:
  429. if(match(TK("]"))) goto __SUBSCR_END;
  430. EXPR_TUPLE();
  431. slice->step = ctx()->s_expr.popx();
  432. state=5;
  433. break;
  434. case 5: consume(TK("]")); goto __SUBSCR_END;
  435. }
  436. }while(true);
  437. __SUBSCR_END:
  438. if(is_slice){
  439. e->b = std::move(slice);
  440. }else{
  441. if(state != 1) FATAL_ERROR();
  442. e->b = std::move(slice->start);
  443. }
  444. ctx()->s_expr.push(std::move(e));
  445. }
  446. void exprLiteral0() {
  447. ctx()->s_expr.push(make_expr<Literal0Expr>(prev().type));
  448. }
  449. void compile_block_body() {
  450. consume(TK(":"));
  451. if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){
  452. compile_stmt(); // inline block
  453. return;
  454. }
  455. if(!match_newlines(mode()==REPL_MODE)){
  456. SyntaxError("expected a new line after ':'");
  457. }
  458. consume(TK("@indent"));
  459. while (curr().type != TK("@dedent")) {
  460. match_newlines();
  461. compile_stmt();
  462. match_newlines();
  463. }
  464. consume(TK("@dedent"));
  465. }
  466. Str _compile_import() {
  467. if(name_scope() != NAME_GLOBAL) SyntaxError("import statement should be used in global scope");
  468. consume(TK("@id"));
  469. Str name = prev().str();
  470. ctx()->emit(OP_IMPORT_NAME, StrName(name).index, prev().line);
  471. return name;
  472. }
  473. // import a as b
  474. void compile_normal_import() {
  475. do {
  476. Str name = _compile_import();
  477. if (match(TK("as"))) {
  478. consume(TK("@id"));
  479. name = prev().str();
  480. }
  481. ctx()->emit(OP_STORE_GLOBAL, StrName(name).index, prev().line);
  482. } while (match(TK(",")));
  483. consume_end_stmt();
  484. }
  485. // from a import b as c, d as e
  486. void compile_from_import() {
  487. _compile_import();
  488. consume(TK("import"));
  489. if (match(TK("*"))) {
  490. ctx()->emit(OP_IMPORT_STAR, BC_NOARG, prev().line);
  491. consume_end_stmt();
  492. return;
  493. }
  494. do {
  495. ctx()->emit(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  496. consume(TK("@id"));
  497. Str name = prev().str();
  498. ctx()->emit(OP_LOAD_ATTR, StrName(name).index, prev().line);
  499. if (match(TK("as"))) {
  500. consume(TK("@id"));
  501. name = prev().str();
  502. }
  503. ctx()->emit(OP_STORE_GLOBAL, StrName(name).index, prev().line);
  504. } while (match(TK(",")));
  505. ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  506. consume_end_stmt();
  507. }
  508. void parse_expression(int precedence, bool push_stack=true) {
  509. advance();
  510. PrattCallback prefix = rules[prev().type].prefix;
  511. if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(prev().type));
  512. (this->*prefix)();
  513. while (rules[curr().type].precedence >= precedence) {
  514. TokenIndex op = curr().type;
  515. advance();
  516. PrattCallback infix = rules[op].infix;
  517. if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true");
  518. (this->*infix)();
  519. }
  520. if(!push_stack) ctx()->emit_expr();
  521. }
  522. void compile_if_stmt() {
  523. EXPR(false); // condition
  524. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  525. compile_block_body();
  526. if (match(TK("elif"))) {
  527. int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  528. ctx()->patch_jump(patch);
  529. compile_if_stmt();
  530. ctx()->patch_jump(exit_patch);
  531. } else if (match(TK("else"))) {
  532. int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  533. ctx()->patch_jump(patch);
  534. compile_block_body();
  535. ctx()->patch_jump(exit_patch);
  536. } else {
  537. ctx()->patch_jump(patch);
  538. }
  539. }
  540. void compile_while_loop() {
  541. ctx()->enter_block(WHILE_LOOP);
  542. EXPR(false); // condition
  543. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  544. compile_block_body();
  545. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  546. ctx()->patch_jump(patch);
  547. ctx()->exit_block();
  548. }
  549. void compile_for_loop() {
  550. Expr_ vars = EXPR_VARS();
  551. consume(TK("in"));
  552. EXPR(false);
  553. ctx()->emit(OP_GET_ITER, BC_NOARG, BC_KEEPLINE);
  554. ctx()->enter_block(FOR_LOOP);
  555. ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE);
  556. bool ok = vars->emit_store(ctx());
  557. if(!ok) SyntaxError(); // this error occurs in `vars` instead of this line, but...nevermind
  558. compile_block_body();
  559. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  560. ctx()->exit_block();
  561. }
  562. void compile_try_except() {
  563. ctx()->enter_block(TRY_EXCEPT);
  564. compile_block_body();
  565. std::vector<int> patches = {
  566. ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE)
  567. };
  568. ctx()->exit_block();
  569. do {
  570. consume(TK("except"));
  571. if(match(TK("@id"))){
  572. ctx()->emit(OP_EXCEPTION_MATCH, StrName(prev().str()).index, prev().line);
  573. }else{
  574. ctx()->emit(OP_LOAD_TRUE, BC_NOARG, BC_KEEPLINE);
  575. }
  576. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE);
  577. // pop the exception on match
  578. ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  579. compile_block_body();
  580. patches.push_back(ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE));
  581. ctx()->patch_jump(patch);
  582. }while(curr().type == TK("except"));
  583. // no match, re-raise
  584. ctx()->emit(OP_RE_RAISE, BC_NOARG, BC_KEEPLINE);
  585. for (int patch : patches) ctx()->patch_jump(patch);
  586. }
  587. void compile_decorated(){
  588. std::vector<Expr_> decorators;
  589. do{
  590. EXPR();
  591. decorators.push_back(ctx()->s_expr.popx());
  592. if(!match_newlines_repl()) SyntaxError();
  593. }while(match(TK("@")));
  594. consume(TK("def"));
  595. compile_function(decorators);
  596. }
  597. bool try_compile_assignment(){
  598. Expr* lhs_p = ctx()->s_expr.top().get();
  599. bool inplace;
  600. switch (curr().type) {
  601. case TK("+="): case TK("-="): case TK("*="): case TK("/="): case TK("//="): case TK("%="):
  602. case TK("<<="): case TK(">>="): case TK("&="): case TK("|="): case TK("^="): {
  603. if(ctx()->is_compiling_class) SyntaxError();
  604. inplace = true;
  605. advance();
  606. auto e = make_expr<BinaryExpr>();
  607. e->op = prev().type - 1; // -1 to remove =
  608. e->lhs = ctx()->s_expr.popx();
  609. EXPR_TUPLE();
  610. e->rhs = ctx()->s_expr.popx();
  611. ctx()->s_expr.push(std::move(e));
  612. } break;
  613. case TK("="):
  614. inplace = false;
  615. advance();
  616. EXPR_TUPLE();
  617. break;
  618. default: return false;
  619. }
  620. // std::cout << ctx()->_log_s_expr() << std::endl;
  621. Expr_ rhs = ctx()->s_expr.popx();
  622. if(lhs_p->is_starred() || rhs->is_starred()){
  623. SyntaxError("can't use starred expression here");
  624. }
  625. rhs->emit(ctx());
  626. bool ok = lhs_p->emit_store(ctx());
  627. if(!ok) SyntaxError();
  628. if(!inplace) ctx()->s_expr.pop();
  629. return true;
  630. }
  631. void compile_stmt() {
  632. advance();
  633. int kw_line = prev().line; // backup line number
  634. switch(prev().type){
  635. case TK("break"):
  636. if (!ctx()->is_curr_block_loop()) SyntaxError("'break' outside loop");
  637. ctx()->emit(OP_LOOP_BREAK, BC_NOARG, kw_line);
  638. consume_end_stmt();
  639. break;
  640. case TK("continue"):
  641. if (!ctx()->is_curr_block_loop()) SyntaxError("'continue' not properly in loop");
  642. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, kw_line);
  643. consume_end_stmt();
  644. break;
  645. case TK("yield"):
  646. if (contexts.size() <= 1) SyntaxError("'yield' outside function");
  647. EXPR_TUPLE(false);
  648. // if yield present, mark the function as generator
  649. ctx()->co->is_generator = true;
  650. ctx()->emit(OP_YIELD_VALUE, BC_NOARG, kw_line);
  651. consume_end_stmt();
  652. break;
  653. case TK("return"):
  654. if (contexts.size() <= 1) SyntaxError("'return' outside function");
  655. if(match_end_stmt()){
  656. ctx()->emit(OP_LOAD_NONE, BC_NOARG, kw_line);
  657. }else{
  658. EXPR_TUPLE(false);
  659. consume_end_stmt();
  660. }
  661. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, kw_line);
  662. break;
  663. /*************************************************/
  664. case TK("if"): compile_if_stmt(); break;
  665. case TK("while"): compile_while_loop(); break;
  666. case TK("for"): compile_for_loop(); break;
  667. case TK("import"): compile_normal_import(); break;
  668. case TK("from"): compile_from_import(); break;
  669. case TK("def"): compile_function(); break;
  670. case TK("@"): compile_decorated(); break;
  671. case TK("try"): compile_try_except(); break;
  672. case TK("pass"): consume_end_stmt(); break;
  673. /*************************************************/
  674. case TK("assert"):
  675. EXPR_TUPLE(false);
  676. ctx()->emit(OP_ASSERT, BC_NOARG, kw_line);
  677. consume_end_stmt();
  678. break;
  679. case TK("global"):
  680. do {
  681. consume(TK("@id"));
  682. ctx()->co->global_names.insert(prev().str());
  683. } while (match(TK(",")));
  684. consume_end_stmt();
  685. break;
  686. case TK("raise"): {
  687. consume(TK("@id"));
  688. int dummy_t = StrName(prev().str()).index;
  689. if(match(TK("(")) && !match(TK(")"))){
  690. EXPR(false); consume(TK(")"));
  691. }else{
  692. ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE);
  693. }
  694. ctx()->emit(OP_RAISE, dummy_t, kw_line);
  695. consume_end_stmt();
  696. } break;
  697. case TK("del"): {
  698. EXPR_TUPLE();
  699. Expr_ e = ctx()->s_expr.popx();
  700. bool ok = e->emit_del(ctx());
  701. if(!ok) SyntaxError();
  702. consume_end_stmt();
  703. } break;
  704. case TK("with"): {
  705. // TODO: reimpl this
  706. EXPR(false);
  707. ctx()->emit(OP_POP_TOP, BC_NOARG, prev().line);
  708. consume(TK("as"));
  709. consume(TK("@id"));
  710. // emit(OP_STORE_NAME, index);
  711. // emit(OP_LOAD_NAME_REF, index);
  712. // emit(OP_WITH_ENTER);
  713. compile_block_body();
  714. // emit(OP_LOAD_NAME_REF, index);
  715. // emit(OP_WITH_EXIT);
  716. } break;
  717. /*************************************************/
  718. // TODO: refactor goto/label use special $ syntax
  719. case TK("label"): {
  720. if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE");
  721. consume(TK(".")); consume(TK("@id"));
  722. bool ok = ctx()->add_label(prev().str());
  723. if(!ok) SyntaxError("label " + prev().str().escape() + " already exists");
  724. consume_end_stmt();
  725. } break;
  726. case TK("goto"):
  727. if(mode()!=EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE");
  728. consume(TK(".")); consume(TK("@id"));
  729. ctx()->emit(OP_GOTO, StrName(prev().str()).index, prev().line);
  730. consume_end_stmt();
  731. break;
  732. /*************************************************/
  733. // handle dangling expression or assignment
  734. default: {
  735. advance(-1); // do revert since we have pre-called advance() at the beginning
  736. EXPR_TUPLE();
  737. if(!try_compile_assignment()){
  738. ctx()->emit_expr();
  739. if(mode()==REPL_MODE && name_scope()==NAME_GLOBAL){
  740. ctx()->emit(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE);
  741. }else{
  742. ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  743. }
  744. }
  745. consume_end_stmt();
  746. }
  747. }
  748. }
  749. void compile_class(){
  750. consume(TK("@id"));
  751. int namei = StrName(prev().str()).index;
  752. int super_namei = -1;
  753. if(match(TK("(")) && match(TK("@id"))){
  754. super_namei = StrName(prev().str()).index;
  755. consume(TK(")"));
  756. }
  757. if(super_namei == -1) ctx()->emit(OP_LOAD_NONE, BC_NOARG, prev().line);
  758. else ctx()->emit(OP_LOAD_GLOBAL, super_namei, prev().line);
  759. ctx()->emit(OP_BEGIN_CLASS, namei, BC_KEEPLINE);
  760. ctx()->is_compiling_class = true;
  761. compile_block_body();
  762. ctx()->is_compiling_class = false;
  763. ctx()->emit(OP_END_CLASS, BC_NOARG, BC_KEEPLINE);
  764. }
  765. void _compile_f_args(FuncDecl_ decl, bool enable_type_hints){
  766. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  767. do {
  768. if(state == 3) SyntaxError("**kwargs should be the last argument");
  769. match_newlines();
  770. if(match(TK("*"))){
  771. if(state < 1) state = 1;
  772. else SyntaxError("*args should be placed before **kwargs");
  773. }
  774. else if(match(TK("**"))){
  775. state = 3;
  776. }
  777. consume(TK("@id"));
  778. StrName name = prev().str();
  779. // check duplicate argument name
  780. for(int i: decl->args){
  781. if(decl->code->varnames[i] == name) {
  782. SyntaxError("duplicate argument name");
  783. }
  784. }
  785. if(decl->starred_arg!=-1 && decl->code->varnames[decl->starred_arg] == name){
  786. SyntaxError("duplicate argument name");
  787. }
  788. for(auto& kv: decl->kwargs){
  789. if(decl->code->varnames[kv.key] == name){
  790. SyntaxError("duplicate argument name");
  791. }
  792. }
  793. // eat type hints
  794. if(enable_type_hints && match(TK(":"))) consume(TK("@id"));
  795. if(state == 0 && curr().type == TK("=")) state = 2;
  796. int index = ctx()->add_varname(name);
  797. switch (state)
  798. {
  799. case 0:
  800. decl->args.push_back(index);
  801. break;
  802. case 1:
  803. decl->starred_arg = index;
  804. state+=1;
  805. break;
  806. case 2: {
  807. consume(TK("="));
  808. PyObject* value = read_literal();
  809. if(value == nullptr){
  810. SyntaxError(Str("expect a literal, not ") + TK_STR(curr().type));
  811. }
  812. decl->kwargs.push_back(FuncDecl::KwArg{index, value});
  813. } break;
  814. case 3: SyntaxError("**kwargs is not supported yet"); break;
  815. }
  816. } while (match(TK(",")));
  817. }
  818. void compile_function(const std::vector<Expr_>& decorators={}){
  819. Str obj_name;
  820. Str decl_name;
  821. consume(TK("@id"));
  822. decl_name = prev().str();
  823. if(!ctx()->is_compiling_class && match(TK("::"))){
  824. consume(TK("@id"));
  825. obj_name = decl_name;
  826. decl_name = prev().str();
  827. }
  828. FuncDecl_ decl = push_f_context(decl_name);
  829. consume(TK("("));
  830. if (!match(TK(")"))) {
  831. _compile_f_args(decl, true);
  832. consume(TK(")"));
  833. }
  834. if(match(TK("->"))){
  835. if(!match(TK("None"))) consume(TK("@id"));
  836. }
  837. compile_block_body();
  838. pop_context();
  839. ctx()->emit(OP_LOAD_FUNCTION, ctx()->add_func_decl(decl), prev().line);
  840. // add decorators
  841. for(auto it=decorators.rbegin(); it!=decorators.rend(); ++it){
  842. (*it)->emit(ctx());
  843. ctx()->emit(OP_ROT_TWO, BC_NOARG, (*it)->line);
  844. ctx()->emit(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE);
  845. ctx()->emit(OP_ROT_TWO, BC_NOARG, BC_KEEPLINE);
  846. ctx()->emit(OP_CALL, 1, (*it)->line);
  847. }
  848. if(!ctx()->is_compiling_class){
  849. if(obj_name.empty()){
  850. auto e = make_expr<NameExpr>(decl_name, name_scope());
  851. e->emit_store(ctx());
  852. } else {
  853. ctx()->emit(OP_LOAD_GLOBAL, StrName(obj_name).index, prev().line);
  854. int index = StrName(decl_name).index;
  855. ctx()->emit(OP_STORE_ATTR, index, prev().line);
  856. }
  857. }else{
  858. int index = StrName(decl_name).index;
  859. ctx()->emit(OP_STORE_CLASS_ATTR, index, prev().line);
  860. }
  861. }
  862. PyObject* to_object(const TokenValue& value){
  863. PyObject* obj = nullptr;
  864. if(std::holds_alternative<i64>(value)){
  865. obj = VAR(std::get<i64>(value));
  866. }
  867. if(std::holds_alternative<f64>(value)){
  868. obj = VAR(std::get<f64>(value));
  869. }
  870. if(std::holds_alternative<Str>(value)){
  871. obj = VAR(std::get<Str>(value));
  872. }
  873. if(obj == nullptr) FATAL_ERROR();
  874. return obj;
  875. }
  876. PyObject* read_literal(){
  877. advance();
  878. switch(prev().type){
  879. case TK("-"): {
  880. consume(TK("@num"));
  881. PyObject* val = to_object(prev().value);
  882. return vm->num_negated(val);
  883. }
  884. case TK("@num"): return to_object(prev().value);
  885. case TK("@str"): return to_object(prev().value);
  886. case TK("True"): return VAR(true);
  887. case TK("False"): return VAR(false);
  888. case TK("None"): return vm->None;
  889. case TK("..."): return vm->Ellipsis;
  890. default: break;
  891. }
  892. return nullptr;
  893. }
  894. void SyntaxError(Str msg){ lexer->throw_err("SyntaxError", msg, err().line, err().start); }
  895. void SyntaxError(){ lexer->throw_err("SyntaxError", "invalid syntax", err().line, err().start); }
  896. void IndentationError(Str msg){ lexer->throw_err("IndentationError", msg, err().line, err().start); }
  897. public:
  898. Compiler(VM* vm, const Str& source, const Str& filename, CompileMode mode, bool unknown_global_scope=false){
  899. this->vm = vm;
  900. this->used = false;
  901. this->unknown_global_scope = unknown_global_scope;
  902. this->lexer = std::make_unique<Lexer>(
  903. make_sp<SourceData>(source, filename, mode)
  904. );
  905. init_pratt_rules();
  906. }
  907. CodeObject_ compile(){
  908. if(used) FATAL_ERROR();
  909. used = true;
  910. tokens = lexer->run();
  911. // if(lexer->src->filename == "<stdin>"){
  912. // for(auto& t: tokens) std::cout << t.info() << std::endl;
  913. // }
  914. CodeObject_ code = push_global_context();
  915. advance(); // skip @sof, so prev() is always valid
  916. match_newlines(); // skip possible leading '\n'
  917. if(mode()==EVAL_MODE) {
  918. EXPR_TUPLE(false);
  919. consume(TK("@eof"));
  920. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  921. pop_context();
  922. return code;
  923. }else if(mode()==JSON_MODE){
  924. EXPR();
  925. Expr_ e = ctx()->s_expr.popx();
  926. if(!e->is_json_object()) SyntaxError("expect a JSON object, literal or array");
  927. consume(TK("@eof"));
  928. e->emit(ctx());
  929. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  930. pop_context();
  931. return code;
  932. }
  933. while (!match(TK("@eof"))) {
  934. if (match(TK("class"))) {
  935. compile_class();
  936. } else {
  937. compile_stmt();
  938. }
  939. match_newlines();
  940. }
  941. pop_context();
  942. return code;
  943. }
  944. };
  945. } // namespace pkpy