compiler.cpp 40 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060
  1. #include "pocketpy/compiler.h"
  2. namespace pkpy{
  3. NameScope Compiler::name_scope() const {
  4. auto s = contexts.size()>1 ? NAME_LOCAL : NAME_GLOBAL;
  5. if(unknown_global_scope && s == NAME_GLOBAL) s = NAME_GLOBAL_UNKNOWN;
  6. return s;
  7. }
  8. CodeObject_ Compiler::push_global_context(){
  9. CodeObject_ co = make_sp<CodeObject>(lexer->src, lexer->src->filename);
  10. contexts.push(CodeEmitContext(vm, co, contexts.size()));
  11. return co;
  12. }
  13. FuncDecl_ Compiler::push_f_context(Str name){
  14. FuncDecl_ decl = make_sp<FuncDecl>();
  15. decl->code = make_sp<CodeObject>(lexer->src, name);
  16. decl->nested = name_scope() == NAME_LOCAL;
  17. contexts.push(CodeEmitContext(vm, decl->code, contexts.size()));
  18. return decl;
  19. }
  20. void Compiler::pop_context(){
  21. if(!ctx()->s_expr.empty()){
  22. throw std::runtime_error("!ctx()->s_expr.empty()\n" + ctx()->_log_s_expr());
  23. }
  24. // add a `return None` in the end as a guard
  25. // previously, we only do this if the last opcode is not a return
  26. // however, this is buggy...since there may be a jump to the end (out of bound) even if the last opcode is a return
  27. ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE);
  28. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  29. // ctx()->co->optimize(vm);
  30. if(ctx()->co->varnames.size() > PK_MAX_CO_VARNAMES){
  31. SyntaxError("maximum number of local variables exceeded");
  32. }
  33. contexts.pop();
  34. }
  35. void Compiler::init_pratt_rules(){
  36. if(rules[TK(".")].precedence != PREC_NONE) return;
  37. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  38. #define METHOD(name) &Compiler::name
  39. #define NO_INFIX nullptr, PREC_NONE
  40. for(TokenIndex i=0; i<kTokenCount; i++) rules[i] = { nullptr, NO_INFIX };
  41. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  42. rules[TK("(")] = { METHOD(exprGroup), METHOD(exprCall), PREC_CALL };
  43. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscr), PREC_SUBSCRIPT };
  44. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  45. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  46. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  47. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  48. rules[TK("*")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_FACTOR };
  49. rules[TK("~")] = { METHOD(exprUnaryOp), nullptr, PREC_UNARY };
  50. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  51. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  52. rules[TK("**")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_EXPONENT };
  53. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  54. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  55. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  56. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  57. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  58. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  59. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  60. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  61. rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  62. rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  63. rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND };
  64. rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR };
  65. rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  66. rules[TK("@")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  67. rules[TK("if")] = { nullptr, METHOD(exprTernary), PREC_TERNARY };
  68. rules[TK(",")] = { nullptr, METHOD(exprTuple), PREC_TUPLE };
  69. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  70. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  71. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  72. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  73. rules[TK("not")] = { METHOD(exprNot), nullptr, PREC_LOGICAL_NOT };
  74. rules[TK("True")] = { METHOD(exprLiteral0), NO_INFIX };
  75. rules[TK("False")] = { METHOD(exprLiteral0), NO_INFIX };
  76. rules[TK("None")] = { METHOD(exprLiteral0), NO_INFIX };
  77. rules[TK("...")] = { METHOD(exprLiteral0), NO_INFIX };
  78. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  79. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  80. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  81. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  82. rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
  83. rules[TK("@long")] = { METHOD(exprLong), NO_INFIX };
  84. rules[TK("@bytes")] = { METHOD(exprBytes), NO_INFIX };
  85. #undef METHOD
  86. #undef NO_INFIX
  87. }
  88. bool Compiler::match(TokenIndex expected) {
  89. if (curr().type != expected) return false;
  90. advance();
  91. return true;
  92. }
  93. void Compiler::consume(TokenIndex expected) {
  94. if (!match(expected)){
  95. SyntaxError(
  96. fmt("expected '", TK_STR(expected), "', got '", TK_STR(curr().type), "'")
  97. );
  98. }
  99. }
  100. bool Compiler::match_newlines_repl(){
  101. return match_newlines(mode()==REPL_MODE);
  102. }
  103. bool Compiler::match_newlines(bool repl_throw) {
  104. bool consumed = false;
  105. if (curr().type == TK("@eol")) {
  106. while (curr().type == TK("@eol")) advance();
  107. consumed = true;
  108. }
  109. if (repl_throw && curr().type == TK("@eof")){
  110. throw NeedMoreLines(ctx()->is_compiling_class);
  111. }
  112. return consumed;
  113. }
  114. bool Compiler::match_end_stmt() {
  115. if (match(TK(";"))) { match_newlines(); return true; }
  116. if (match_newlines() || curr().type == TK("@eof")) return true;
  117. if (curr().type == TK("@dedent")) return true;
  118. return false;
  119. }
  120. void Compiler::consume_end_stmt() {
  121. if (!match_end_stmt()) SyntaxError("expected statement end");
  122. }
  123. void Compiler::EXPR(bool push_stack) {
  124. parse_expression(PREC_TUPLE+1, push_stack);
  125. }
  126. void Compiler::EXPR_TUPLE(bool push_stack) {
  127. parse_expression(PREC_TUPLE, push_stack);
  128. }
  129. // special case for `for loop` and `comp`
  130. Expr_ Compiler::EXPR_VARS(){
  131. std::vector<Expr_> items;
  132. do {
  133. consume(TK("@id"));
  134. items.push_back(make_expr<NameExpr>(prev().str(), name_scope()));
  135. } while(match(TK(",")));
  136. if(items.size()==1) return std::move(items[0]);
  137. return make_expr<TupleExpr>(std::move(items));
  138. }
  139. void Compiler::exprLiteral(){
  140. ctx()->s_expr.push(make_expr<LiteralExpr>(prev().value));
  141. }
  142. void Compiler::exprLong(){
  143. ctx()->s_expr.push(make_expr<LongExpr>(prev().str()));
  144. }
  145. void Compiler::exprBytes(){
  146. ctx()->s_expr.push(make_expr<BytesExpr>(std::get<Str>(prev().value)));
  147. }
  148. void Compiler::exprFString(){
  149. ctx()->s_expr.push(make_expr<FStringExpr>(std::get<Str>(prev().value)));
  150. }
  151. void Compiler::exprLambda(){
  152. FuncDecl_ decl = push_f_context("<lambda>");
  153. auto e = make_expr<LambdaExpr>(decl);
  154. if(!match(TK(":"))){
  155. _compile_f_args(e->decl, false);
  156. consume(TK(":"));
  157. }
  158. // https://github.com/blueloveTH/pocketpy/issues/37
  159. parse_expression(PREC_LAMBDA + 1, false);
  160. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  161. pop_context();
  162. ctx()->s_expr.push(std::move(e));
  163. }
  164. void Compiler::exprTuple(){
  165. std::vector<Expr_> items;
  166. items.push_back(ctx()->s_expr.popx());
  167. do {
  168. if(curr().brackets_level) match_newlines_repl();
  169. if(!is_expression()) break;
  170. EXPR();
  171. items.push_back(ctx()->s_expr.popx());
  172. if(curr().brackets_level) match_newlines_repl();
  173. } while(match(TK(",")));
  174. ctx()->s_expr.push(make_expr<TupleExpr>(
  175. std::move(items)
  176. ));
  177. }
  178. void Compiler::exprOr(){
  179. auto e = make_expr<OrExpr>();
  180. e->lhs = ctx()->s_expr.popx();
  181. parse_expression(PREC_LOGICAL_OR + 1);
  182. e->rhs = ctx()->s_expr.popx();
  183. ctx()->s_expr.push(std::move(e));
  184. }
  185. void Compiler::exprAnd(){
  186. auto e = make_expr<AndExpr>();
  187. e->lhs = ctx()->s_expr.popx();
  188. parse_expression(PREC_LOGICAL_AND + 1);
  189. e->rhs = ctx()->s_expr.popx();
  190. ctx()->s_expr.push(std::move(e));
  191. }
  192. void Compiler::exprTernary(){
  193. auto e = make_expr<TernaryExpr>();
  194. e->true_expr = ctx()->s_expr.popx();
  195. // cond
  196. parse_expression(PREC_TERNARY + 1);
  197. e->cond = ctx()->s_expr.popx();
  198. consume(TK("else"));
  199. // if false
  200. parse_expression(PREC_TERNARY + 1);
  201. e->false_expr = ctx()->s_expr.popx();
  202. ctx()->s_expr.push(std::move(e));
  203. }
  204. void Compiler::exprBinaryOp(){
  205. auto e = make_expr<BinaryExpr>();
  206. e->op = prev().type;
  207. e->lhs = ctx()->s_expr.popx();
  208. parse_expression(rules[e->op].precedence + 1);
  209. e->rhs = ctx()->s_expr.popx();
  210. ctx()->s_expr.push(std::move(e));
  211. }
  212. void Compiler::exprNot() {
  213. parse_expression(PREC_LOGICAL_NOT + 1);
  214. ctx()->s_expr.push(make_expr<NotExpr>(ctx()->s_expr.popx()));
  215. }
  216. void Compiler::exprUnaryOp(){
  217. TokenIndex op = prev().type;
  218. parse_expression(PREC_UNARY + 1);
  219. switch(op){
  220. case TK("-"):
  221. ctx()->s_expr.push(make_expr<NegatedExpr>(ctx()->s_expr.popx()));
  222. break;
  223. case TK("~"):
  224. ctx()->s_expr.push(make_expr<InvertExpr>(ctx()->s_expr.popx()));
  225. break;
  226. case TK("*"):
  227. ctx()->s_expr.push(make_expr<StarredExpr>(1, ctx()->s_expr.popx()));
  228. break;
  229. case TK("**"):
  230. ctx()->s_expr.push(make_expr<StarredExpr>(2, ctx()->s_expr.popx()));
  231. break;
  232. default: FATAL_ERROR();
  233. }
  234. }
  235. void Compiler::exprGroup(){
  236. match_newlines_repl();
  237. EXPR_TUPLE(); // () is just for change precedence
  238. match_newlines_repl();
  239. consume(TK(")"));
  240. if(ctx()->s_expr.top()->is_tuple()) return;
  241. Expr_ g = make_expr<GroupedExpr>(ctx()->s_expr.popx());
  242. ctx()->s_expr.push(std::move(g));
  243. }
  244. void Compiler::exprList() {
  245. int line = prev().line;
  246. std::vector<Expr_> items;
  247. do {
  248. match_newlines_repl();
  249. if (curr().type == TK("]")) break;
  250. EXPR();
  251. items.push_back(ctx()->s_expr.popx());
  252. match_newlines_repl();
  253. if(items.size()==1 && match(TK("for"))){
  254. _consume_comp<ListCompExpr>(std::move(items[0]));
  255. consume(TK("]"));
  256. return;
  257. }
  258. match_newlines_repl();
  259. } while (match(TK(",")));
  260. consume(TK("]"));
  261. auto e = make_expr<ListExpr>(std::move(items));
  262. e->line = line; // override line
  263. ctx()->s_expr.push(std::move(e));
  264. }
  265. void Compiler::exprMap() {
  266. bool parsing_dict = false; // {...} may be dict or set
  267. std::vector<Expr_> items;
  268. do {
  269. match_newlines_repl();
  270. if (curr().type == TK("}")) break;
  271. EXPR();
  272. int star_level = ctx()->s_expr.top()->star_level();
  273. if(star_level==2 || curr().type == TK(":")){
  274. parsing_dict = true;
  275. }
  276. if(parsing_dict){
  277. auto dict_item = make_expr<DictItemExpr>();
  278. if(star_level == 2){
  279. dict_item->key = nullptr;
  280. dict_item->value = ctx()->s_expr.popx();
  281. }else{
  282. consume(TK(":"));
  283. EXPR();
  284. dict_item->key = ctx()->s_expr.popx();
  285. dict_item->value = ctx()->s_expr.popx();
  286. }
  287. items.push_back(std::move(dict_item));
  288. }else{
  289. items.push_back(ctx()->s_expr.popx());
  290. }
  291. match_newlines_repl();
  292. if(items.size()==1 && match(TK("for"))){
  293. if(parsing_dict) _consume_comp<DictCompExpr>(std::move(items[0]));
  294. else _consume_comp<SetCompExpr>(std::move(items[0]));
  295. consume(TK("}"));
  296. return;
  297. }
  298. match_newlines_repl();
  299. } while (match(TK(",")));
  300. consume(TK("}"));
  301. if(items.size()==0 || parsing_dict){
  302. auto e = make_expr<DictExpr>(std::move(items));
  303. ctx()->s_expr.push(std::move(e));
  304. }else{
  305. auto e = make_expr<SetExpr>(std::move(items));
  306. ctx()->s_expr.push(std::move(e));
  307. }
  308. }
  309. void Compiler::exprCall() {
  310. auto e = make_expr<CallExpr>();
  311. e->callable = ctx()->s_expr.popx();
  312. do {
  313. match_newlines_repl();
  314. if (curr().type==TK(")")) break;
  315. if(curr().type==TK("@id") && next().type==TK("=")) {
  316. consume(TK("@id"));
  317. Str key = prev().str();
  318. consume(TK("="));
  319. EXPR();
  320. e->kwargs.push_back({key, ctx()->s_expr.popx()});
  321. } else{
  322. EXPR();
  323. if(ctx()->s_expr.top()->star_level() == 2){
  324. // **kwargs
  325. e->kwargs.push_back({"**", ctx()->s_expr.popx()});
  326. }else{
  327. // positional argument
  328. if(!e->kwargs.empty()) SyntaxError("positional argument follows keyword argument");
  329. e->args.push_back(ctx()->s_expr.popx());
  330. }
  331. }
  332. match_newlines_repl();
  333. } while (match(TK(",")));
  334. consume(TK(")"));
  335. if(e->args.size() > 32767) SyntaxError("too many positional arguments");
  336. if(e->kwargs.size() > 32767) SyntaxError("too many keyword arguments");
  337. ctx()->s_expr.push(std::move(e));
  338. }
  339. void Compiler::exprName(){
  340. Str name = prev().str();
  341. NameScope scope = name_scope();
  342. if(ctx()->global_names.count(name)){
  343. scope = NAME_GLOBAL;
  344. }
  345. ctx()->s_expr.push(make_expr<NameExpr>(name, scope));
  346. }
  347. void Compiler::exprAttrib() {
  348. consume(TK("@id"));
  349. ctx()->s_expr.push(
  350. make_expr<AttribExpr>(ctx()->s_expr.popx(), prev().str())
  351. );
  352. }
  353. void Compiler::exprSubscr() {
  354. auto e = make_expr<SubscrExpr>();
  355. e->a = ctx()->s_expr.popx();
  356. auto slice = make_expr<SliceExpr>();
  357. bool is_slice = false;
  358. // a[<0> <state:1> : state<3> : state<5>]
  359. int state = 0;
  360. do{
  361. switch(state){
  362. case 0:
  363. if(match(TK(":"))){
  364. is_slice=true;
  365. state=2;
  366. break;
  367. }
  368. if(match(TK("]"))) SyntaxError();
  369. EXPR_TUPLE();
  370. slice->start = ctx()->s_expr.popx();
  371. state=1;
  372. break;
  373. case 1:
  374. if(match(TK(":"))){
  375. is_slice=true;
  376. state=2;
  377. break;
  378. }
  379. if(match(TK("]"))) goto __SUBSCR_END;
  380. SyntaxError("expected ':' or ']'");
  381. break;
  382. case 2:
  383. if(match(TK(":"))){
  384. state=4;
  385. break;
  386. }
  387. if(match(TK("]"))) goto __SUBSCR_END;
  388. EXPR_TUPLE();
  389. slice->stop = ctx()->s_expr.popx();
  390. state=3;
  391. break;
  392. case 3:
  393. if(match(TK(":"))){
  394. state=4;
  395. break;
  396. }
  397. if(match(TK("]"))) goto __SUBSCR_END;
  398. SyntaxError("expected ':' or ']'");
  399. break;
  400. case 4:
  401. if(match(TK("]"))) goto __SUBSCR_END;
  402. EXPR_TUPLE();
  403. slice->step = ctx()->s_expr.popx();
  404. state=5;
  405. break;
  406. case 5: consume(TK("]")); goto __SUBSCR_END;
  407. }
  408. }while(true);
  409. __SUBSCR_END:
  410. if(is_slice){
  411. e->b = std::move(slice);
  412. }else{
  413. if(state != 1) FATAL_ERROR();
  414. e->b = std::move(slice->start);
  415. }
  416. ctx()->s_expr.push(std::move(e));
  417. }
  418. void Compiler::exprLiteral0() {
  419. ctx()->s_expr.push(make_expr<Literal0Expr>(prev().type));
  420. }
  421. void Compiler::compile_block_body() {
  422. consume(TK(":"));
  423. if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){
  424. compile_stmt(); // inline block
  425. return;
  426. }
  427. if(!match_newlines(mode()==REPL_MODE)){
  428. SyntaxError("expected a new line after ':'");
  429. }
  430. consume(TK("@indent"));
  431. while (curr().type != TK("@dedent")) {
  432. match_newlines();
  433. compile_stmt();
  434. match_newlines();
  435. }
  436. consume(TK("@dedent"));
  437. }
  438. Str Compiler::_compile_import() {
  439. if(name_scope() != NAME_GLOBAL) SyntaxError("import statement should be used in global scope");
  440. Opcode op = OP_IMPORT_NAME;
  441. if(match(TK("."))) op = OP_IMPORT_NAME_REL;
  442. consume(TK("@id"));
  443. Str name = prev().str();
  444. ctx()->emit(op, StrName(name).index, prev().line);
  445. return name;
  446. }
  447. // import a as b
  448. void Compiler::compile_normal_import() {
  449. do {
  450. Str name = _compile_import();
  451. if (match(TK("as"))) {
  452. consume(TK("@id"));
  453. name = prev().str();
  454. }
  455. ctx()->emit(OP_STORE_GLOBAL, StrName(name).index, prev().line);
  456. } while (match(TK(",")));
  457. consume_end_stmt();
  458. }
  459. // from a import b as c, d as e
  460. void Compiler::compile_from_import() {
  461. _compile_import();
  462. consume(TK("import"));
  463. if (match(TK("*"))) {
  464. ctx()->emit(OP_IMPORT_STAR, BC_NOARG, prev().line);
  465. consume_end_stmt();
  466. return;
  467. }
  468. do {
  469. ctx()->emit(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  470. consume(TK("@id"));
  471. Str name = prev().str();
  472. ctx()->emit(OP_LOAD_ATTR, StrName(name).index, prev().line);
  473. if (match(TK("as"))) {
  474. consume(TK("@id"));
  475. name = prev().str();
  476. }
  477. ctx()->emit(OP_STORE_GLOBAL, StrName(name).index, prev().line);
  478. } while (match(TK(",")));
  479. ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  480. consume_end_stmt();
  481. }
  482. bool Compiler::is_expression(){
  483. PrattCallback prefix = rules[curr().type].prefix;
  484. return prefix != nullptr;
  485. }
  486. void Compiler::parse_expression(int precedence, bool push_stack) {
  487. PrattCallback prefix = rules[curr().type].prefix;
  488. if (prefix == nullptr) SyntaxError(Str("expected an expression, got ") + TK_STR(curr().type));
  489. advance();
  490. (this->*prefix)();
  491. while (rules[curr().type].precedence >= precedence) {
  492. TokenIndex op = curr().type;
  493. advance();
  494. PrattCallback infix = rules[op].infix;
  495. PK_ASSERT(infix != nullptr);
  496. (this->*infix)();
  497. }
  498. if(!push_stack) ctx()->emit_expr();
  499. }
  500. void Compiler::compile_if_stmt() {
  501. EXPR(false); // condition
  502. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  503. compile_block_body();
  504. if (match(TK("elif"))) {
  505. int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  506. ctx()->patch_jump(patch);
  507. compile_if_stmt();
  508. ctx()->patch_jump(exit_patch);
  509. } else if (match(TK("else"))) {
  510. int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  511. ctx()->patch_jump(patch);
  512. compile_block_body();
  513. ctx()->patch_jump(exit_patch);
  514. } else {
  515. ctx()->patch_jump(patch);
  516. }
  517. }
  518. void Compiler::compile_while_loop() {
  519. ctx()->enter_block(WHILE_LOOP);
  520. EXPR(false); // condition
  521. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  522. compile_block_body();
  523. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  524. ctx()->patch_jump(patch);
  525. ctx()->exit_block();
  526. }
  527. void Compiler::compile_for_loop() {
  528. Expr_ vars = EXPR_VARS();
  529. consume(TK("in"));
  530. EXPR_TUPLE(false);
  531. ctx()->emit(OP_GET_ITER, BC_NOARG, BC_KEEPLINE);
  532. ctx()->enter_block(FOR_LOOP);
  533. ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE);
  534. bool ok = vars->emit_store(ctx());
  535. if(!ok) SyntaxError(); // this error occurs in `vars` instead of this line, but...nevermind
  536. compile_block_body();
  537. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  538. ctx()->exit_block();
  539. }
  540. void Compiler::compile_try_except() {
  541. ctx()->enter_block(TRY_EXCEPT);
  542. compile_block_body();
  543. std::vector<int> patches = {
  544. ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE)
  545. };
  546. ctx()->exit_block();
  547. do {
  548. consume(TK("except"));
  549. if(match(TK("@id"))){
  550. ctx()->emit(OP_EXCEPTION_MATCH, StrName(prev().str()).index, prev().line);
  551. }else{
  552. ctx()->emit(OP_LOAD_TRUE, BC_NOARG, BC_KEEPLINE);
  553. }
  554. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE);
  555. // pop the exception on match
  556. ctx()->emit(OP_POP_EXCEPTION, BC_NOARG, BC_KEEPLINE);
  557. compile_block_body();
  558. patches.push_back(ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE));
  559. ctx()->patch_jump(patch);
  560. }while(curr().type == TK("except"));
  561. // no match, re-raise
  562. ctx()->emit(OP_RE_RAISE, BC_NOARG, BC_KEEPLINE);
  563. for (int patch : patches) ctx()->patch_jump(patch);
  564. }
  565. void Compiler::compile_decorated(){
  566. std::vector<Expr_> decorators;
  567. do{
  568. EXPR();
  569. decorators.push_back(ctx()->s_expr.popx());
  570. if(!match_newlines_repl()) SyntaxError();
  571. }while(match(TK("@")));
  572. consume(TK("def"));
  573. compile_function(decorators);
  574. }
  575. bool Compiler::try_compile_assignment(){
  576. switch (curr().type) {
  577. case TK("+="): case TK("-="): case TK("*="): case TK("/="): case TK("//="): case TK("%="):
  578. case TK("<<="): case TK(">>="): case TK("&="): case TK("|="): case TK("^="): {
  579. Expr* lhs_p = ctx()->s_expr.top().get();
  580. if(lhs_p->is_starred()) SyntaxError();
  581. if(ctx()->is_compiling_class) SyntaxError("can't use inplace operator in class definition");
  582. advance();
  583. auto e = make_expr<BinaryExpr>();
  584. e->op = prev().type - 1; // -1 to remove =
  585. e->lhs = ctx()->s_expr.popx();
  586. EXPR_TUPLE();
  587. e->rhs = ctx()->s_expr.popx();
  588. if(e->is_starred()) SyntaxError();
  589. e->emit(ctx());
  590. bool ok = lhs_p->emit_store(ctx());
  591. if(!ok) SyntaxError();
  592. } return true;
  593. case TK("="): {
  594. int n = 0;
  595. while(match(TK("="))){
  596. EXPR_TUPLE();
  597. Expr* _tp = ctx()->s_expr.top().get();
  598. if(ctx()->is_compiling_class && _tp->is_tuple()){
  599. SyntaxError("can't use unpack tuple in class definition");
  600. }
  601. n += 1;
  602. }
  603. if(ctx()->is_compiling_class && n>1){
  604. SyntaxError("can't assign to multiple targets in class definition");
  605. }
  606. // stack size is n+1
  607. Expr_ val = ctx()->s_expr.popx();
  608. val->emit(ctx());
  609. for(int j=1; j<n; j++) ctx()->emit(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  610. for(int j=0; j<n; j++){
  611. auto e = ctx()->s_expr.popx();
  612. if(e->is_starred()) SyntaxError();
  613. bool ok = e->emit_store(ctx());
  614. if(!ok) SyntaxError();
  615. }
  616. } return true;
  617. default: return false;
  618. }
  619. }
  620. void Compiler::compile_stmt() {
  621. advance();
  622. int kw_line = prev().line; // backup line number
  623. switch(prev().type){
  624. case TK("break"):
  625. if (!ctx()->is_curr_block_loop()) SyntaxError("'break' outside loop");
  626. ctx()->emit(OP_LOOP_BREAK, BC_NOARG, kw_line);
  627. consume_end_stmt();
  628. break;
  629. case TK("continue"):
  630. if (!ctx()->is_curr_block_loop()) SyntaxError("'continue' not properly in loop");
  631. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, kw_line);
  632. consume_end_stmt();
  633. break;
  634. case TK("yield"):
  635. if (contexts.size() <= 1) SyntaxError("'yield' outside function");
  636. EXPR_TUPLE(false);
  637. // if yield present, mark the function as generator
  638. ctx()->co->is_generator = true;
  639. ctx()->emit(OP_YIELD_VALUE, BC_NOARG, kw_line);
  640. consume_end_stmt();
  641. break;
  642. case TK("yield from"):
  643. if (contexts.size() <= 1) SyntaxError("'yield from' outside function");
  644. EXPR_TUPLE(false);
  645. // if yield from present, mark the function as generator
  646. ctx()->co->is_generator = true;
  647. ctx()->emit(OP_GET_ITER, BC_NOARG, kw_line);
  648. ctx()->enter_block(FOR_LOOP);
  649. ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE);
  650. ctx()->emit(OP_YIELD_VALUE, BC_NOARG, BC_KEEPLINE);
  651. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  652. ctx()->exit_block();
  653. consume_end_stmt();
  654. break;
  655. case TK("return"):
  656. if (contexts.size() <= 1) SyntaxError("'return' outside function");
  657. if(match_end_stmt()){
  658. ctx()->emit(OP_LOAD_NONE, BC_NOARG, kw_line);
  659. }else{
  660. EXPR_TUPLE(false);
  661. consume_end_stmt();
  662. }
  663. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, kw_line);
  664. break;
  665. /*************************************************/
  666. case TK("if"): compile_if_stmt(); break;
  667. case TK("while"): compile_while_loop(); break;
  668. case TK("for"): compile_for_loop(); break;
  669. case TK("import"): compile_normal_import(); break;
  670. case TK("from"): compile_from_import(); break;
  671. case TK("def"): compile_function(); break;
  672. case TK("@"): compile_decorated(); break;
  673. case TK("try"): compile_try_except(); break;
  674. case TK("pass"): consume_end_stmt(); break;
  675. /*************************************************/
  676. case TK("++"):{
  677. consume(TK("@id"));
  678. StrName name(prev().sv());
  679. switch(name_scope()){
  680. case NAME_LOCAL:
  681. ctx()->emit(OP_INC_FAST, ctx()->add_varname(name), prev().line);
  682. break;
  683. case NAME_GLOBAL:
  684. ctx()->emit(OP_INC_GLOBAL, name.index, prev().line);
  685. break;
  686. default: SyntaxError(); break;
  687. }
  688. consume_end_stmt();
  689. break;
  690. }
  691. case TK("--"):{
  692. consume(TK("@id"));
  693. StrName name(prev().sv());
  694. switch(name_scope()){
  695. case NAME_LOCAL:
  696. ctx()->emit(OP_DEC_FAST, ctx()->add_varname(name), prev().line);
  697. break;
  698. case NAME_GLOBAL:
  699. ctx()->emit(OP_DEC_GLOBAL, name.index, prev().line);
  700. break;
  701. default: SyntaxError(); break;
  702. }
  703. consume_end_stmt();
  704. break;
  705. }
  706. case TK("assert"):
  707. EXPR_TUPLE(false);
  708. ctx()->emit(OP_ASSERT, BC_NOARG, kw_line);
  709. consume_end_stmt();
  710. break;
  711. case TK("global"):
  712. do {
  713. consume(TK("@id"));
  714. ctx()->global_names.insert(prev().str());
  715. } while (match(TK(",")));
  716. consume_end_stmt();
  717. break;
  718. case TK("raise"): {
  719. consume(TK("@id"));
  720. int dummy_t = StrName(prev().str()).index;
  721. if(match(TK("(")) && !match(TK(")"))){
  722. EXPR(false); consume(TK(")"));
  723. }else{
  724. ctx()->emit(OP_LOAD_NONE, BC_NOARG, kw_line);
  725. }
  726. ctx()->emit(OP_RAISE, dummy_t, kw_line);
  727. consume_end_stmt();
  728. } break;
  729. case TK("del"): {
  730. EXPR_TUPLE();
  731. Expr_ e = ctx()->s_expr.popx();
  732. bool ok = e->emit_del(ctx());
  733. if(!ok) SyntaxError();
  734. consume_end_stmt();
  735. } break;
  736. case TK("with"): {
  737. EXPR(false);
  738. consume(TK("as"));
  739. consume(TK("@id"));
  740. Expr_ e = make_expr<NameExpr>(prev().str(), name_scope());
  741. bool ok = e->emit_store(ctx());
  742. if(!ok) SyntaxError();
  743. e->emit(ctx());
  744. ctx()->emit(OP_WITH_ENTER, BC_NOARG, prev().line);
  745. compile_block_body();
  746. e->emit(ctx());
  747. ctx()->emit(OP_WITH_EXIT, BC_NOARG, prev().line);
  748. } break;
  749. /*************************************************/
  750. case TK("$label"): {
  751. if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE");
  752. consume(TK("@id"));
  753. bool ok = ctx()->add_label(prev().str());
  754. if(!ok) SyntaxError("label " + prev().str().escape() + " already exists");
  755. consume_end_stmt();
  756. } break;
  757. case TK("$goto"):
  758. if(mode()!=EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE");
  759. consume(TK("@id"));
  760. ctx()->emit(OP_GOTO, StrName(prev().str()).index, prev().line);
  761. consume_end_stmt();
  762. break;
  763. /*************************************************/
  764. // handle dangling expression or assignment
  765. default: {
  766. advance(-1); // do revert since we have pre-called advance() at the beginning
  767. EXPR_TUPLE();
  768. // eat variable's type hint
  769. if(match(TK(":"))) consume_type_hints();
  770. if(!try_compile_assignment()){
  771. if(!ctx()->s_expr.empty() && ctx()->s_expr.top()->is_starred()){
  772. SyntaxError();
  773. }
  774. ctx()->emit_expr();
  775. if((mode()==CELL_MODE || mode()==REPL_MODE) && name_scope()==NAME_GLOBAL){
  776. ctx()->emit(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE);
  777. }else{
  778. ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  779. }
  780. }
  781. consume_end_stmt();
  782. }
  783. }
  784. }
  785. void Compiler::consume_type_hints(){
  786. EXPR();
  787. ctx()->s_expr.pop();
  788. }
  789. void Compiler::compile_class(){
  790. consume(TK("@id"));
  791. int namei = StrName(prev().str()).index;
  792. Expr_ base = nullptr;
  793. if(match(TK("("))){
  794. if(is_expression()){
  795. EXPR();
  796. base = ctx()->s_expr.popx();
  797. }
  798. consume(TK(")"));
  799. }
  800. if(base == nullptr){
  801. ctx()->emit(OP_LOAD_NONE, BC_NOARG, prev().line);
  802. }else {
  803. base->emit(ctx());
  804. }
  805. ctx()->emit(OP_BEGIN_CLASS, namei, BC_KEEPLINE);
  806. ctx()->is_compiling_class = true;
  807. compile_block_body();
  808. ctx()->is_compiling_class = false;
  809. ctx()->emit(OP_END_CLASS, BC_NOARG, BC_KEEPLINE);
  810. }
  811. void Compiler::_compile_f_args(FuncDecl_ decl, bool enable_type_hints){
  812. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  813. do {
  814. if(state > 3) SyntaxError();
  815. if(state == 3) SyntaxError("**kwargs should be the last argument");
  816. match_newlines();
  817. if(match(TK("*"))){
  818. if(state < 1) state = 1;
  819. else SyntaxError("*args should be placed before **kwargs");
  820. }
  821. else if(match(TK("**"))){
  822. state = 3;
  823. }
  824. consume(TK("@id"));
  825. StrName name = prev().str();
  826. // check duplicate argument name
  827. for(int j: decl->args){
  828. if(decl->code->varnames[j] == name) {
  829. SyntaxError("duplicate argument name");
  830. }
  831. }
  832. for(auto& kv: decl->kwargs){
  833. if(decl->code->varnames[kv.key] == name){
  834. SyntaxError("duplicate argument name");
  835. }
  836. }
  837. if(decl->starred_arg!=-1 && decl->code->varnames[decl->starred_arg] == name){
  838. SyntaxError("duplicate argument name");
  839. }
  840. if(decl->starred_kwarg!=-1 && decl->code->varnames[decl->starred_kwarg] == name){
  841. SyntaxError("duplicate argument name");
  842. }
  843. // eat type hints
  844. if(enable_type_hints && match(TK(":"))) consume_type_hints();
  845. if(state == 0 && curr().type == TK("=")) state = 2;
  846. int index = ctx()->add_varname(name);
  847. switch (state)
  848. {
  849. case 0:
  850. decl->args.push_back(index);
  851. break;
  852. case 1:
  853. decl->starred_arg = index;
  854. state+=1;
  855. break;
  856. case 2: {
  857. consume(TK("="));
  858. PyObject* value = read_literal();
  859. if(value == nullptr){
  860. SyntaxError(Str("default argument must be a literal"));
  861. }
  862. decl->kwargs.push_back(FuncDecl::KwArg{index, value});
  863. } break;
  864. case 3:
  865. decl->starred_kwarg = index;
  866. state+=1;
  867. break;
  868. }
  869. } while (match(TK(",")));
  870. }
  871. void Compiler::compile_function(const std::vector<Expr_>& decorators){
  872. const char* _start = curr().start;
  873. consume(TK("@id"));
  874. Str decl_name = prev().str();
  875. FuncDecl_ decl = push_f_context(decl_name);
  876. consume(TK("("));
  877. if (!match(TK(")"))) {
  878. _compile_f_args(decl, true);
  879. consume(TK(")"));
  880. }
  881. if(match(TK("->"))) consume_type_hints();
  882. const char* _end = curr().start;
  883. decl->signature = Str(_start, _end-_start);
  884. compile_block_body();
  885. pop_context();
  886. PyObject* docstring = nullptr;
  887. if(decl->code->codes.size()>=2 && decl->code->codes[0].op == OP_LOAD_CONST && decl->code->codes[1].op == OP_POP_TOP){
  888. PyObject* c = decl->code->consts[decl->code->codes[0].arg];
  889. if(is_type(c, vm->tp_str)){
  890. decl->code->codes[0].op = OP_NO_OP;
  891. decl->code->codes[1].op = OP_NO_OP;
  892. docstring = c;
  893. }
  894. }
  895. if(docstring != nullptr){
  896. decl->docstring = PK_OBJ_GET(Str, docstring);
  897. }
  898. ctx()->emit(OP_LOAD_FUNCTION, ctx()->add_func_decl(decl), prev().line);
  899. // add decorators
  900. for(auto it=decorators.rbegin(); it!=decorators.rend(); ++it){
  901. (*it)->emit(ctx());
  902. ctx()->emit(OP_ROT_TWO, BC_NOARG, (*it)->line);
  903. ctx()->emit(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE);
  904. ctx()->emit(OP_ROT_TWO, BC_NOARG, BC_KEEPLINE);
  905. ctx()->emit(OP_CALL, 1, (*it)->line);
  906. }
  907. if(!ctx()->is_compiling_class){
  908. auto e = make_expr<NameExpr>(decl_name, name_scope());
  909. e->emit_store(ctx());
  910. }else{
  911. int index = StrName(decl_name).index;
  912. ctx()->emit(OP_STORE_CLASS_ATTR, index, prev().line);
  913. }
  914. }
  915. PyObject* Compiler::to_object(const TokenValue& value){
  916. PyObject* obj = nullptr;
  917. if(std::holds_alternative<i64>(value)){
  918. obj = VAR(std::get<i64>(value));
  919. }
  920. if(std::holds_alternative<f64>(value)){
  921. obj = VAR(std::get<f64>(value));
  922. }
  923. if(std::holds_alternative<Str>(value)){
  924. obj = VAR(std::get<Str>(value));
  925. }
  926. if(obj == nullptr) FATAL_ERROR();
  927. return obj;
  928. }
  929. PyObject* Compiler::read_literal(){
  930. advance();
  931. switch(prev().type){
  932. case TK("-"): {
  933. consume(TK("@num"));
  934. PyObject* val = to_object(prev().value);
  935. return vm->py_negate(val);
  936. }
  937. case TK("@num"): return to_object(prev().value);
  938. case TK("@str"): return to_object(prev().value);
  939. case TK("True"): return VAR(true);
  940. case TK("False"): return VAR(false);
  941. case TK("None"): return vm->None;
  942. case TK("..."): return vm->Ellipsis;
  943. default: break;
  944. }
  945. return nullptr;
  946. }
  947. Compiler::Compiler(VM* vm, const Str& source, const Str& filename, CompileMode mode, bool unknown_global_scope){
  948. this->vm = vm;
  949. this->used = false;
  950. this->unknown_global_scope = unknown_global_scope;
  951. this->lexer = std::make_unique<Lexer>(
  952. make_sp<SourceData>(source, filename, mode)
  953. );
  954. init_pratt_rules();
  955. }
  956. CodeObject_ Compiler::compile(){
  957. if(used) FATAL_ERROR();
  958. used = true;
  959. tokens = lexer->run();
  960. // if(lexer->src->filename == "<stdin>"){
  961. // for(auto& t: tokens) std::cout << t.info() << std::endl;
  962. // }
  963. CodeObject_ code = push_global_context();
  964. advance(); // skip @sof, so prev() is always valid
  965. match_newlines(); // skip possible leading '\n'
  966. if(mode()==EVAL_MODE) {
  967. EXPR_TUPLE(false);
  968. consume(TK("@eof"));
  969. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  970. pop_context();
  971. return code;
  972. }else if(mode()==JSON_MODE){
  973. EXPR();
  974. Expr_ e = ctx()->s_expr.popx();
  975. if(!e->is_json_object()) SyntaxError("expect a JSON object, literal or array");
  976. consume(TK("@eof"));
  977. e->emit(ctx());
  978. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  979. pop_context();
  980. return code;
  981. }
  982. while (!match(TK("@eof"))) {
  983. if (match(TK("class"))) {
  984. compile_class();
  985. } else {
  986. compile_stmt();
  987. }
  988. match_newlines();
  989. }
  990. pop_context();
  991. return code;
  992. }
  993. } // namespace pkpy