compiler.cpp 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055
  1. #include "pocketpy/compiler.h"
  2. namespace pkpy{
  3. NameScope Compiler::name_scope() const {
  4. auto s = contexts.size()>1 ? NAME_LOCAL : NAME_GLOBAL;
  5. if(unknown_global_scope && s == NAME_GLOBAL) s = NAME_GLOBAL_UNKNOWN;
  6. return s;
  7. }
  8. CodeObject_ Compiler::push_global_context(){
  9. CodeObject_ co = make_sp<CodeObject>(lexer->src, lexer->src->filename);
  10. contexts.push(CodeEmitContext(vm, co, contexts.size()));
  11. return co;
  12. }
  13. FuncDecl_ Compiler::push_f_context(Str name){
  14. FuncDecl_ decl = make_sp<FuncDecl>();
  15. decl->code = make_sp<CodeObject>(lexer->src, name);
  16. decl->nested = name_scope() == NAME_LOCAL;
  17. contexts.push(CodeEmitContext(vm, decl->code, contexts.size()));
  18. return decl;
  19. }
  20. void Compiler::pop_context(){
  21. if(!ctx()->s_expr.empty()){
  22. throw std::runtime_error("!ctx()->s_expr.empty()\n" + ctx()->_log_s_expr());
  23. }
  24. // add a `return None` in the end as a guard
  25. // previously, we only do this if the last opcode is not a return
  26. // however, this is buggy...since there may be a jump to the end (out of bound) even if the last opcode is a return
  27. ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE);
  28. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  29. // ctx()->co->optimize(vm);
  30. if(ctx()->co->varnames.size() > PK_MAX_CO_VARNAMES){
  31. SyntaxError("maximum number of local variables exceeded");
  32. }
  33. contexts.pop();
  34. }
  35. void Compiler::init_pratt_rules(){
  36. if(rules[TK(".")].precedence != PREC_NONE) return;
  37. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  38. #define METHOD(name) &Compiler::name
  39. #define NO_INFIX nullptr, PREC_NONE
  40. for(TokenIndex i=0; i<kTokenCount; i++) rules[i] = { nullptr, NO_INFIX };
  41. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  42. rules[TK("(")] = { METHOD(exprGroup), METHOD(exprCall), PREC_CALL };
  43. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscr), PREC_SUBSCRIPT };
  44. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  45. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  46. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  47. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  48. rules[TK("*")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_FACTOR };
  49. rules[TK("~")] = { METHOD(exprUnaryOp), nullptr, PREC_UNARY };
  50. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  51. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  52. rules[TK("**")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_EXPONENT };
  53. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  54. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  55. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  56. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  57. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  58. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  59. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  60. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  61. rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  62. rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  63. rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND };
  64. rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR };
  65. rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  66. rules[TK("@")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  67. rules[TK("if")] = { nullptr, METHOD(exprTernary), PREC_TERNARY };
  68. rules[TK(",")] = { nullptr, METHOD(exprTuple), PREC_TUPLE };
  69. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  70. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  71. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  72. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  73. rules[TK("not")] = { METHOD(exprNot), nullptr, PREC_LOGICAL_NOT };
  74. rules[TK("True")] = { METHOD(exprLiteral0), NO_INFIX };
  75. rules[TK("False")] = { METHOD(exprLiteral0), NO_INFIX };
  76. rules[TK("None")] = { METHOD(exprLiteral0), NO_INFIX };
  77. rules[TK("...")] = { METHOD(exprLiteral0), NO_INFIX };
  78. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  79. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  80. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  81. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  82. rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
  83. rules[TK("@long")] = { METHOD(exprLong), NO_INFIX };
  84. #undef METHOD
  85. #undef NO_INFIX
  86. }
  87. bool Compiler::match(TokenIndex expected) {
  88. if (curr().type != expected) return false;
  89. advance();
  90. return true;
  91. }
  92. void Compiler::consume(TokenIndex expected) {
  93. if (!match(expected)){
  94. SyntaxError(
  95. fmt("expected '", TK_STR(expected), "', got '", TK_STR(curr().type), "'")
  96. );
  97. }
  98. }
  99. bool Compiler::match_newlines_repl(){
  100. return match_newlines(mode()==REPL_MODE);
  101. }
  102. bool Compiler::match_newlines(bool repl_throw) {
  103. bool consumed = false;
  104. if (curr().type == TK("@eol")) {
  105. while (curr().type == TK("@eol")) advance();
  106. consumed = true;
  107. }
  108. if (repl_throw && curr().type == TK("@eof")){
  109. throw NeedMoreLines(ctx()->is_compiling_class);
  110. }
  111. return consumed;
  112. }
  113. bool Compiler::match_end_stmt() {
  114. if (match(TK(";"))) { match_newlines(); return true; }
  115. if (match_newlines() || curr().type == TK("@eof")) return true;
  116. if (curr().type == TK("@dedent")) return true;
  117. return false;
  118. }
  119. void Compiler::consume_end_stmt() {
  120. if (!match_end_stmt()) SyntaxError("expected statement end");
  121. }
  122. void Compiler::EXPR(bool push_stack) {
  123. parse_expression(PREC_TUPLE+1, push_stack);
  124. }
  125. void Compiler::EXPR_TUPLE(bool push_stack) {
  126. parse_expression(PREC_TUPLE, push_stack);
  127. }
  128. // special case for `for loop` and `comp`
  129. Expr_ Compiler::EXPR_VARS(){
  130. std::vector<Expr_> items;
  131. do {
  132. consume(TK("@id"));
  133. items.push_back(make_expr<NameExpr>(prev().str(), name_scope()));
  134. } while(match(TK(",")));
  135. if(items.size()==1) return std::move(items[0]);
  136. return make_expr<TupleExpr>(std::move(items));
  137. }
  138. void Compiler::exprLiteral(){
  139. ctx()->s_expr.push(make_expr<LiteralExpr>(prev().value));
  140. }
  141. void Compiler::exprLong(){
  142. ctx()->s_expr.push(make_expr<LongExpr>(prev().str()));
  143. }
  144. void Compiler::exprFString(){
  145. ctx()->s_expr.push(make_expr<FStringExpr>(std::get<Str>(prev().value)));
  146. }
  147. void Compiler::exprLambda(){
  148. FuncDecl_ decl = push_f_context("<lambda>");
  149. auto e = make_expr<LambdaExpr>(decl);
  150. if(!match(TK(":"))){
  151. _compile_f_args(e->decl, false);
  152. consume(TK(":"));
  153. }
  154. // https://github.com/blueloveTH/pocketpy/issues/37
  155. parse_expression(PREC_LAMBDA + 1, false);
  156. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  157. pop_context();
  158. ctx()->s_expr.push(std::move(e));
  159. }
  160. void Compiler::exprTuple(){
  161. std::vector<Expr_> items;
  162. items.push_back(ctx()->s_expr.popx());
  163. do {
  164. if(curr().brackets_level) match_newlines_repl();
  165. if(!is_expression()) break;
  166. EXPR();
  167. items.push_back(ctx()->s_expr.popx());
  168. if(curr().brackets_level) match_newlines_repl();
  169. } while(match(TK(",")));
  170. ctx()->s_expr.push(make_expr<TupleExpr>(
  171. std::move(items)
  172. ));
  173. }
  174. void Compiler::exprOr(){
  175. auto e = make_expr<OrExpr>();
  176. e->lhs = ctx()->s_expr.popx();
  177. parse_expression(PREC_LOGICAL_OR + 1);
  178. e->rhs = ctx()->s_expr.popx();
  179. ctx()->s_expr.push(std::move(e));
  180. }
  181. void Compiler::exprAnd(){
  182. auto e = make_expr<AndExpr>();
  183. e->lhs = ctx()->s_expr.popx();
  184. parse_expression(PREC_LOGICAL_AND + 1);
  185. e->rhs = ctx()->s_expr.popx();
  186. ctx()->s_expr.push(std::move(e));
  187. }
  188. void Compiler::exprTernary(){
  189. auto e = make_expr<TernaryExpr>();
  190. e->true_expr = ctx()->s_expr.popx();
  191. // cond
  192. parse_expression(PREC_TERNARY + 1);
  193. e->cond = ctx()->s_expr.popx();
  194. consume(TK("else"));
  195. // if false
  196. parse_expression(PREC_TERNARY + 1);
  197. e->false_expr = ctx()->s_expr.popx();
  198. ctx()->s_expr.push(std::move(e));
  199. }
  200. void Compiler::exprBinaryOp(){
  201. auto e = make_expr<BinaryExpr>();
  202. e->op = prev().type;
  203. e->lhs = ctx()->s_expr.popx();
  204. parse_expression(rules[e->op].precedence + 1);
  205. e->rhs = ctx()->s_expr.popx();
  206. ctx()->s_expr.push(std::move(e));
  207. }
  208. void Compiler::exprNot() {
  209. parse_expression(PREC_LOGICAL_NOT + 1);
  210. ctx()->s_expr.push(make_expr<NotExpr>(ctx()->s_expr.popx()));
  211. }
  212. void Compiler::exprUnaryOp(){
  213. TokenIndex op = prev().type;
  214. parse_expression(PREC_UNARY + 1);
  215. switch(op){
  216. case TK("-"):
  217. ctx()->s_expr.push(make_expr<NegatedExpr>(ctx()->s_expr.popx()));
  218. break;
  219. case TK("~"):
  220. ctx()->s_expr.push(make_expr<InvertExpr>(ctx()->s_expr.popx()));
  221. break;
  222. case TK("*"):
  223. ctx()->s_expr.push(make_expr<StarredExpr>(1, ctx()->s_expr.popx()));
  224. break;
  225. case TK("**"):
  226. ctx()->s_expr.push(make_expr<StarredExpr>(2, ctx()->s_expr.popx()));
  227. break;
  228. default: FATAL_ERROR();
  229. }
  230. }
  231. void Compiler::exprGroup(){
  232. match_newlines_repl();
  233. EXPR_TUPLE(); // () is just for change precedence
  234. match_newlines_repl();
  235. consume(TK(")"));
  236. if(ctx()->s_expr.top()->is_tuple()) return;
  237. Expr_ g = make_expr<GroupedExpr>(ctx()->s_expr.popx());
  238. ctx()->s_expr.push(std::move(g));
  239. }
  240. void Compiler::exprList() {
  241. int line = prev().line;
  242. std::vector<Expr_> items;
  243. do {
  244. match_newlines_repl();
  245. if (curr().type == TK("]")) break;
  246. EXPR();
  247. items.push_back(ctx()->s_expr.popx());
  248. match_newlines_repl();
  249. if(items.size()==1 && match(TK("for"))){
  250. _consume_comp<ListCompExpr>(std::move(items[0]));
  251. consume(TK("]"));
  252. return;
  253. }
  254. match_newlines_repl();
  255. } while (match(TK(",")));
  256. consume(TK("]"));
  257. auto e = make_expr<ListExpr>(std::move(items));
  258. e->line = line; // override line
  259. ctx()->s_expr.push(std::move(e));
  260. }
  261. void Compiler::exprMap() {
  262. bool parsing_dict = false; // {...} may be dict or set
  263. std::vector<Expr_> items;
  264. do {
  265. match_newlines_repl();
  266. if (curr().type == TK("}")) break;
  267. EXPR();
  268. int star_level = ctx()->s_expr.top()->star_level();
  269. if(star_level==2 || curr().type == TK(":")){
  270. parsing_dict = true;
  271. }
  272. if(parsing_dict){
  273. auto dict_item = make_expr<DictItemExpr>();
  274. if(star_level == 2){
  275. dict_item->key = nullptr;
  276. dict_item->value = ctx()->s_expr.popx();
  277. }else{
  278. consume(TK(":"));
  279. EXPR();
  280. dict_item->key = ctx()->s_expr.popx();
  281. dict_item->value = ctx()->s_expr.popx();
  282. }
  283. items.push_back(std::move(dict_item));
  284. }else{
  285. items.push_back(ctx()->s_expr.popx());
  286. }
  287. match_newlines_repl();
  288. if(items.size()==1 && match(TK("for"))){
  289. if(parsing_dict) _consume_comp<DictCompExpr>(std::move(items[0]));
  290. else _consume_comp<SetCompExpr>(std::move(items[0]));
  291. consume(TK("}"));
  292. return;
  293. }
  294. match_newlines_repl();
  295. } while (match(TK(",")));
  296. consume(TK("}"));
  297. if(items.size()==0 || parsing_dict){
  298. auto e = make_expr<DictExpr>(std::move(items));
  299. ctx()->s_expr.push(std::move(e));
  300. }else{
  301. auto e = make_expr<SetExpr>(std::move(items));
  302. ctx()->s_expr.push(std::move(e));
  303. }
  304. }
  305. void Compiler::exprCall() {
  306. auto e = make_expr<CallExpr>();
  307. e->callable = ctx()->s_expr.popx();
  308. do {
  309. match_newlines_repl();
  310. if (curr().type==TK(")")) break;
  311. if(curr().type==TK("@id") && next().type==TK("=")) {
  312. consume(TK("@id"));
  313. Str key = prev().str();
  314. consume(TK("="));
  315. EXPR();
  316. e->kwargs.push_back({key, ctx()->s_expr.popx()});
  317. } else{
  318. EXPR();
  319. if(ctx()->s_expr.top()->star_level() == 2){
  320. // **kwargs
  321. e->kwargs.push_back({"**", ctx()->s_expr.popx()});
  322. }else{
  323. // positional argument
  324. if(!e->kwargs.empty()) SyntaxError("positional argument follows keyword argument");
  325. e->args.push_back(ctx()->s_expr.popx());
  326. }
  327. }
  328. match_newlines_repl();
  329. } while (match(TK(",")));
  330. consume(TK(")"));
  331. if(e->args.size() > 32767) SyntaxError("too many positional arguments");
  332. if(e->kwargs.size() > 32767) SyntaxError("too many keyword arguments");
  333. ctx()->s_expr.push(std::move(e));
  334. }
  335. void Compiler::exprName(){
  336. Str name = prev().str();
  337. NameScope scope = name_scope();
  338. if(ctx()->global_names.count(name)){
  339. scope = NAME_GLOBAL;
  340. }
  341. ctx()->s_expr.push(make_expr<NameExpr>(name, scope));
  342. }
  343. void Compiler::exprAttrib() {
  344. consume(TK("@id"));
  345. ctx()->s_expr.push(
  346. make_expr<AttribExpr>(ctx()->s_expr.popx(), prev().str())
  347. );
  348. }
  349. void Compiler::exprSubscr() {
  350. auto e = make_expr<SubscrExpr>();
  351. e->a = ctx()->s_expr.popx();
  352. auto slice = make_expr<SliceExpr>();
  353. bool is_slice = false;
  354. // a[<0> <state:1> : state<3> : state<5>]
  355. int state = 0;
  356. do{
  357. switch(state){
  358. case 0:
  359. if(match(TK(":"))){
  360. is_slice=true;
  361. state=2;
  362. break;
  363. }
  364. if(match(TK("]"))) SyntaxError();
  365. EXPR_TUPLE();
  366. slice->start = ctx()->s_expr.popx();
  367. state=1;
  368. break;
  369. case 1:
  370. if(match(TK(":"))){
  371. is_slice=true;
  372. state=2;
  373. break;
  374. }
  375. if(match(TK("]"))) goto __SUBSCR_END;
  376. SyntaxError("expected ':' or ']'");
  377. break;
  378. case 2:
  379. if(match(TK(":"))){
  380. state=4;
  381. break;
  382. }
  383. if(match(TK("]"))) goto __SUBSCR_END;
  384. EXPR_TUPLE();
  385. slice->stop = ctx()->s_expr.popx();
  386. state=3;
  387. break;
  388. case 3:
  389. if(match(TK(":"))){
  390. state=4;
  391. break;
  392. }
  393. if(match(TK("]"))) goto __SUBSCR_END;
  394. SyntaxError("expected ':' or ']'");
  395. break;
  396. case 4:
  397. if(match(TK("]"))) goto __SUBSCR_END;
  398. EXPR_TUPLE();
  399. slice->step = ctx()->s_expr.popx();
  400. state=5;
  401. break;
  402. case 5: consume(TK("]")); goto __SUBSCR_END;
  403. }
  404. }while(true);
  405. __SUBSCR_END:
  406. if(is_slice){
  407. e->b = std::move(slice);
  408. }else{
  409. if(state != 1) FATAL_ERROR();
  410. e->b = std::move(slice->start);
  411. }
  412. ctx()->s_expr.push(std::move(e));
  413. }
  414. void Compiler::exprLiteral0() {
  415. ctx()->s_expr.push(make_expr<Literal0Expr>(prev().type));
  416. }
  417. void Compiler::compile_block_body() {
  418. consume(TK(":"));
  419. if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){
  420. compile_stmt(); // inline block
  421. return;
  422. }
  423. if(!match_newlines(mode()==REPL_MODE)){
  424. SyntaxError("expected a new line after ':'");
  425. }
  426. consume(TK("@indent"));
  427. while (curr().type != TK("@dedent")) {
  428. match_newlines();
  429. compile_stmt();
  430. match_newlines();
  431. }
  432. consume(TK("@dedent"));
  433. }
  434. Str Compiler::_compile_import() {
  435. if(name_scope() != NAME_GLOBAL) SyntaxError("import statement should be used in global scope");
  436. Opcode op = OP_IMPORT_NAME;
  437. if(match(TK("."))) op = OP_IMPORT_NAME_REL;
  438. consume(TK("@id"));
  439. Str name = prev().str();
  440. ctx()->emit(op, StrName(name).index, prev().line);
  441. return name;
  442. }
  443. // import a as b
  444. void Compiler::compile_normal_import() {
  445. do {
  446. Str name = _compile_import();
  447. if (match(TK("as"))) {
  448. consume(TK("@id"));
  449. name = prev().str();
  450. }
  451. ctx()->emit(OP_STORE_GLOBAL, StrName(name).index, prev().line);
  452. } while (match(TK(",")));
  453. consume_end_stmt();
  454. }
  455. // from a import b as c, d as e
  456. void Compiler::compile_from_import() {
  457. _compile_import();
  458. consume(TK("import"));
  459. if (match(TK("*"))) {
  460. ctx()->emit(OP_IMPORT_STAR, BC_NOARG, prev().line);
  461. consume_end_stmt();
  462. return;
  463. }
  464. do {
  465. ctx()->emit(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  466. consume(TK("@id"));
  467. Str name = prev().str();
  468. ctx()->emit(OP_LOAD_ATTR, StrName(name).index, prev().line);
  469. if (match(TK("as"))) {
  470. consume(TK("@id"));
  471. name = prev().str();
  472. }
  473. ctx()->emit(OP_STORE_GLOBAL, StrName(name).index, prev().line);
  474. } while (match(TK(",")));
  475. ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  476. consume_end_stmt();
  477. }
  478. bool Compiler::is_expression(){
  479. PrattCallback prefix = rules[curr().type].prefix;
  480. return prefix != nullptr;
  481. }
  482. void Compiler::parse_expression(int precedence, bool push_stack) {
  483. PrattCallback prefix = rules[curr().type].prefix;
  484. if (prefix == nullptr) SyntaxError(Str("expected an expression, got ") + TK_STR(curr().type));
  485. advance();
  486. (this->*prefix)();
  487. while (rules[curr().type].precedence >= precedence) {
  488. TokenIndex op = curr().type;
  489. advance();
  490. PrattCallback infix = rules[op].infix;
  491. PK_ASSERT(infix != nullptr);
  492. (this->*infix)();
  493. }
  494. if(!push_stack) ctx()->emit_expr();
  495. }
  496. void Compiler::compile_if_stmt() {
  497. EXPR(false); // condition
  498. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  499. compile_block_body();
  500. if (match(TK("elif"))) {
  501. int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  502. ctx()->patch_jump(patch);
  503. compile_if_stmt();
  504. ctx()->patch_jump(exit_patch);
  505. } else if (match(TK("else"))) {
  506. int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  507. ctx()->patch_jump(patch);
  508. compile_block_body();
  509. ctx()->patch_jump(exit_patch);
  510. } else {
  511. ctx()->patch_jump(patch);
  512. }
  513. }
  514. void Compiler::compile_while_loop() {
  515. ctx()->enter_block(WHILE_LOOP);
  516. EXPR(false); // condition
  517. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  518. compile_block_body();
  519. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  520. ctx()->patch_jump(patch);
  521. ctx()->exit_block();
  522. }
  523. void Compiler::compile_for_loop() {
  524. Expr_ vars = EXPR_VARS();
  525. consume(TK("in"));
  526. EXPR_TUPLE(false);
  527. ctx()->emit(OP_GET_ITER, BC_NOARG, BC_KEEPLINE);
  528. ctx()->enter_block(FOR_LOOP);
  529. ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE);
  530. bool ok = vars->emit_store(ctx());
  531. if(!ok) SyntaxError(); // this error occurs in `vars` instead of this line, but...nevermind
  532. compile_block_body();
  533. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  534. ctx()->exit_block();
  535. }
  536. void Compiler::compile_try_except() {
  537. ctx()->enter_block(TRY_EXCEPT);
  538. compile_block_body();
  539. std::vector<int> patches = {
  540. ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE)
  541. };
  542. ctx()->exit_block();
  543. do {
  544. consume(TK("except"));
  545. if(match(TK("@id"))){
  546. ctx()->emit(OP_EXCEPTION_MATCH, StrName(prev().str()).index, prev().line);
  547. }else{
  548. ctx()->emit(OP_LOAD_TRUE, BC_NOARG, BC_KEEPLINE);
  549. }
  550. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE);
  551. // pop the exception on match
  552. ctx()->emit(OP_POP_EXCEPTION, BC_NOARG, BC_KEEPLINE);
  553. compile_block_body();
  554. patches.push_back(ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE));
  555. ctx()->patch_jump(patch);
  556. }while(curr().type == TK("except"));
  557. // no match, re-raise
  558. ctx()->emit(OP_RE_RAISE, BC_NOARG, BC_KEEPLINE);
  559. for (int patch : patches) ctx()->patch_jump(patch);
  560. }
  561. void Compiler::compile_decorated(){
  562. std::vector<Expr_> decorators;
  563. do{
  564. EXPR();
  565. decorators.push_back(ctx()->s_expr.popx());
  566. if(!match_newlines_repl()) SyntaxError();
  567. }while(match(TK("@")));
  568. consume(TK("def"));
  569. compile_function(decorators);
  570. }
  571. bool Compiler::try_compile_assignment(){
  572. switch (curr().type) {
  573. case TK("+="): case TK("-="): case TK("*="): case TK("/="): case TK("//="): case TK("%="):
  574. case TK("<<="): case TK(">>="): case TK("&="): case TK("|="): case TK("^="): {
  575. Expr* lhs_p = ctx()->s_expr.top().get();
  576. if(lhs_p->is_starred()) SyntaxError();
  577. if(ctx()->is_compiling_class) SyntaxError("can't use inplace operator in class definition");
  578. advance();
  579. auto e = make_expr<BinaryExpr>();
  580. e->op = prev().type - 1; // -1 to remove =
  581. e->lhs = ctx()->s_expr.popx();
  582. EXPR_TUPLE();
  583. e->rhs = ctx()->s_expr.popx();
  584. if(e->is_starred()) SyntaxError();
  585. e->emit(ctx());
  586. bool ok = lhs_p->emit_store(ctx());
  587. if(!ok) SyntaxError();
  588. } return true;
  589. case TK("="): {
  590. int n = 0;
  591. while(match(TK("="))){
  592. EXPR_TUPLE();
  593. Expr* _tp = ctx()->s_expr.top().get();
  594. if(ctx()->is_compiling_class && _tp->is_tuple()){
  595. SyntaxError("can't use unpack tuple in class definition");
  596. }
  597. n += 1;
  598. }
  599. if(ctx()->is_compiling_class && n>1){
  600. SyntaxError("can't assign to multiple targets in class definition");
  601. }
  602. // stack size is n+1
  603. Expr_ val = ctx()->s_expr.popx();
  604. val->emit(ctx());
  605. for(int j=1; j<n; j++) ctx()->emit(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  606. for(int j=0; j<n; j++){
  607. auto e = ctx()->s_expr.popx();
  608. if(e->is_starred()) SyntaxError();
  609. bool ok = e->emit_store(ctx());
  610. if(!ok) SyntaxError();
  611. }
  612. } return true;
  613. default: return false;
  614. }
  615. }
  616. void Compiler::compile_stmt() {
  617. advance();
  618. int kw_line = prev().line; // backup line number
  619. switch(prev().type){
  620. case TK("break"):
  621. if (!ctx()->is_curr_block_loop()) SyntaxError("'break' outside loop");
  622. ctx()->emit(OP_LOOP_BREAK, BC_NOARG, kw_line);
  623. consume_end_stmt();
  624. break;
  625. case TK("continue"):
  626. if (!ctx()->is_curr_block_loop()) SyntaxError("'continue' not properly in loop");
  627. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, kw_line);
  628. consume_end_stmt();
  629. break;
  630. case TK("yield"):
  631. if (contexts.size() <= 1) SyntaxError("'yield' outside function");
  632. EXPR_TUPLE(false);
  633. // if yield present, mark the function as generator
  634. ctx()->co->is_generator = true;
  635. ctx()->emit(OP_YIELD_VALUE, BC_NOARG, kw_line);
  636. consume_end_stmt();
  637. break;
  638. case TK("yield from"):
  639. if (contexts.size() <= 1) SyntaxError("'yield from' outside function");
  640. EXPR_TUPLE(false);
  641. // if yield from present, mark the function as generator
  642. ctx()->co->is_generator = true;
  643. ctx()->emit(OP_GET_ITER, BC_NOARG, kw_line);
  644. ctx()->enter_block(FOR_LOOP);
  645. ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE);
  646. ctx()->emit(OP_YIELD_VALUE, BC_NOARG, BC_KEEPLINE);
  647. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  648. ctx()->exit_block();
  649. consume_end_stmt();
  650. break;
  651. case TK("return"):
  652. if (contexts.size() <= 1) SyntaxError("'return' outside function");
  653. if(match_end_stmt()){
  654. ctx()->emit(OP_LOAD_NONE, BC_NOARG, kw_line);
  655. }else{
  656. EXPR_TUPLE(false);
  657. consume_end_stmt();
  658. }
  659. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, kw_line);
  660. break;
  661. /*************************************************/
  662. case TK("if"): compile_if_stmt(); break;
  663. case TK("while"): compile_while_loop(); break;
  664. case TK("for"): compile_for_loop(); break;
  665. case TK("import"): compile_normal_import(); break;
  666. case TK("from"): compile_from_import(); break;
  667. case TK("def"): compile_function(); break;
  668. case TK("@"): compile_decorated(); break;
  669. case TK("try"): compile_try_except(); break;
  670. case TK("pass"): consume_end_stmt(); break;
  671. /*************************************************/
  672. case TK("++"):{
  673. consume(TK("@id"));
  674. StrName name(prev().sv());
  675. switch(name_scope()){
  676. case NAME_LOCAL:
  677. ctx()->emit(OP_INC_FAST, ctx()->add_varname(name), prev().line);
  678. break;
  679. case NAME_GLOBAL:
  680. ctx()->emit(OP_INC_GLOBAL, name.index, prev().line);
  681. break;
  682. default: SyntaxError(); break;
  683. }
  684. consume_end_stmt();
  685. break;
  686. }
  687. case TK("--"):{
  688. consume(TK("@id"));
  689. StrName name(prev().sv());
  690. switch(name_scope()){
  691. case NAME_LOCAL:
  692. ctx()->emit(OP_DEC_FAST, ctx()->add_varname(name), prev().line);
  693. break;
  694. case NAME_GLOBAL:
  695. ctx()->emit(OP_DEC_GLOBAL, name.index, prev().line);
  696. break;
  697. default: SyntaxError(); break;
  698. }
  699. consume_end_stmt();
  700. break;
  701. }
  702. case TK("assert"):
  703. EXPR_TUPLE(false);
  704. ctx()->emit(OP_ASSERT, BC_NOARG, kw_line);
  705. consume_end_stmt();
  706. break;
  707. case TK("global"):
  708. do {
  709. consume(TK("@id"));
  710. ctx()->global_names.insert(prev().str());
  711. } while (match(TK(",")));
  712. consume_end_stmt();
  713. break;
  714. case TK("raise"): {
  715. consume(TK("@id"));
  716. int dummy_t = StrName(prev().str()).index;
  717. if(match(TK("(")) && !match(TK(")"))){
  718. EXPR(false); consume(TK(")"));
  719. }else{
  720. ctx()->emit(OP_LOAD_NONE, BC_NOARG, kw_line);
  721. }
  722. ctx()->emit(OP_RAISE, dummy_t, kw_line);
  723. consume_end_stmt();
  724. } break;
  725. case TK("del"): {
  726. EXPR_TUPLE();
  727. Expr_ e = ctx()->s_expr.popx();
  728. bool ok = e->emit_del(ctx());
  729. if(!ok) SyntaxError();
  730. consume_end_stmt();
  731. } break;
  732. case TK("with"): {
  733. EXPR(false);
  734. consume(TK("as"));
  735. consume(TK("@id"));
  736. Expr_ e = make_expr<NameExpr>(prev().str(), name_scope());
  737. bool ok = e->emit_store(ctx());
  738. if(!ok) SyntaxError();
  739. e->emit(ctx());
  740. ctx()->emit(OP_WITH_ENTER, BC_NOARG, prev().line);
  741. compile_block_body();
  742. e->emit(ctx());
  743. ctx()->emit(OP_WITH_EXIT, BC_NOARG, prev().line);
  744. } break;
  745. /*************************************************/
  746. case TK("$label"): {
  747. if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE");
  748. consume(TK("@id"));
  749. bool ok = ctx()->add_label(prev().str());
  750. if(!ok) SyntaxError("label " + prev().str().escape() + " already exists");
  751. consume_end_stmt();
  752. } break;
  753. case TK("$goto"):
  754. if(mode()!=EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE");
  755. consume(TK("@id"));
  756. ctx()->emit(OP_GOTO, StrName(prev().str()).index, prev().line);
  757. consume_end_stmt();
  758. break;
  759. /*************************************************/
  760. // handle dangling expression or assignment
  761. default: {
  762. advance(-1); // do revert since we have pre-called advance() at the beginning
  763. EXPR_TUPLE();
  764. // eat variable's type hint
  765. if(match(TK(":"))) consume_type_hints();
  766. if(!try_compile_assignment()){
  767. if(!ctx()->s_expr.empty() && ctx()->s_expr.top()->is_starred()){
  768. SyntaxError();
  769. }
  770. ctx()->emit_expr();
  771. if((mode()==CELL_MODE || mode()==REPL_MODE) && name_scope()==NAME_GLOBAL){
  772. ctx()->emit(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE);
  773. }else{
  774. ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  775. }
  776. }
  777. consume_end_stmt();
  778. }
  779. }
  780. }
  781. void Compiler::consume_type_hints(){
  782. EXPR();
  783. ctx()->s_expr.pop();
  784. }
  785. void Compiler::compile_class(){
  786. consume(TK("@id"));
  787. int namei = StrName(prev().str()).index;
  788. Expr_ base = nullptr;
  789. if(match(TK("("))){
  790. if(is_expression()){
  791. EXPR();
  792. base = ctx()->s_expr.popx();
  793. }
  794. consume(TK(")"));
  795. }
  796. if(base == nullptr){
  797. ctx()->emit(OP_LOAD_NONE, BC_NOARG, prev().line);
  798. }else {
  799. base->emit(ctx());
  800. }
  801. ctx()->emit(OP_BEGIN_CLASS, namei, BC_KEEPLINE);
  802. ctx()->is_compiling_class = true;
  803. compile_block_body();
  804. ctx()->is_compiling_class = false;
  805. ctx()->emit(OP_END_CLASS, BC_NOARG, BC_KEEPLINE);
  806. }
  807. void Compiler::_compile_f_args(FuncDecl_ decl, bool enable_type_hints){
  808. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  809. do {
  810. if(state > 3) SyntaxError();
  811. if(state == 3) SyntaxError("**kwargs should be the last argument");
  812. match_newlines();
  813. if(match(TK("*"))){
  814. if(state < 1) state = 1;
  815. else SyntaxError("*args should be placed before **kwargs");
  816. }
  817. else if(match(TK("**"))){
  818. state = 3;
  819. }
  820. consume(TK("@id"));
  821. StrName name = prev().str();
  822. // check duplicate argument name
  823. for(int j: decl->args){
  824. if(decl->code->varnames[j] == name) {
  825. SyntaxError("duplicate argument name");
  826. }
  827. }
  828. for(auto& kv: decl->kwargs){
  829. if(decl->code->varnames[kv.key] == name){
  830. SyntaxError("duplicate argument name");
  831. }
  832. }
  833. if(decl->starred_arg!=-1 && decl->code->varnames[decl->starred_arg] == name){
  834. SyntaxError("duplicate argument name");
  835. }
  836. if(decl->starred_kwarg!=-1 && decl->code->varnames[decl->starred_kwarg] == name){
  837. SyntaxError("duplicate argument name");
  838. }
  839. // eat type hints
  840. if(enable_type_hints && match(TK(":"))) consume_type_hints();
  841. if(state == 0 && curr().type == TK("=")) state = 2;
  842. int index = ctx()->add_varname(name);
  843. switch (state)
  844. {
  845. case 0:
  846. decl->args.push_back(index);
  847. break;
  848. case 1:
  849. decl->starred_arg = index;
  850. state+=1;
  851. break;
  852. case 2: {
  853. consume(TK("="));
  854. PyObject* value = read_literal();
  855. if(value == nullptr){
  856. SyntaxError(Str("default argument must be a literal"));
  857. }
  858. decl->kwargs.push_back(FuncDecl::KwArg{index, value});
  859. } break;
  860. case 3:
  861. decl->starred_kwarg = index;
  862. state+=1;
  863. break;
  864. }
  865. } while (match(TK(",")));
  866. }
  867. void Compiler::compile_function(const std::vector<Expr_>& decorators){
  868. const char* _start = curr().start;
  869. consume(TK("@id"));
  870. Str decl_name = prev().str();
  871. FuncDecl_ decl = push_f_context(decl_name);
  872. consume(TK("("));
  873. if (!match(TK(")"))) {
  874. _compile_f_args(decl, true);
  875. consume(TK(")"));
  876. }
  877. if(match(TK("->"))) consume_type_hints();
  878. const char* _end = curr().start;
  879. decl->signature = Str(_start, _end-_start);
  880. compile_block_body();
  881. pop_context();
  882. PyObject* docstring = nullptr;
  883. if(decl->code->codes.size()>=2 && decl->code->codes[0].op == OP_LOAD_CONST && decl->code->codes[1].op == OP_POP_TOP){
  884. PyObject* c = decl->code->consts[decl->code->codes[0].arg];
  885. if(is_type(c, vm->tp_str)){
  886. decl->code->codes[0].op = OP_NO_OP;
  887. decl->code->codes[1].op = OP_NO_OP;
  888. docstring = c;
  889. }
  890. }
  891. if(docstring != nullptr){
  892. decl->docstring = PK_OBJ_GET(Str, docstring);
  893. }
  894. ctx()->emit(OP_LOAD_FUNCTION, ctx()->add_func_decl(decl), prev().line);
  895. // add decorators
  896. for(auto it=decorators.rbegin(); it!=decorators.rend(); ++it){
  897. (*it)->emit(ctx());
  898. ctx()->emit(OP_ROT_TWO, BC_NOARG, (*it)->line);
  899. ctx()->emit(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE);
  900. ctx()->emit(OP_ROT_TWO, BC_NOARG, BC_KEEPLINE);
  901. ctx()->emit(OP_CALL, 1, (*it)->line);
  902. }
  903. if(!ctx()->is_compiling_class){
  904. auto e = make_expr<NameExpr>(decl_name, name_scope());
  905. e->emit_store(ctx());
  906. }else{
  907. int index = StrName(decl_name).index;
  908. ctx()->emit(OP_STORE_CLASS_ATTR, index, prev().line);
  909. }
  910. }
  911. PyObject* Compiler::to_object(const TokenValue& value){
  912. PyObject* obj = nullptr;
  913. if(std::holds_alternative<i64>(value)){
  914. obj = VAR(std::get<i64>(value));
  915. }
  916. if(std::holds_alternative<f64>(value)){
  917. obj = VAR(std::get<f64>(value));
  918. }
  919. if(std::holds_alternative<Str>(value)){
  920. obj = VAR(std::get<Str>(value));
  921. }
  922. if(obj == nullptr) FATAL_ERROR();
  923. return obj;
  924. }
  925. PyObject* Compiler::read_literal(){
  926. advance();
  927. switch(prev().type){
  928. case TK("-"): {
  929. consume(TK("@num"));
  930. PyObject* val = to_object(prev().value);
  931. return vm->py_negate(val);
  932. }
  933. case TK("@num"): return to_object(prev().value);
  934. case TK("@str"): return to_object(prev().value);
  935. case TK("True"): return VAR(true);
  936. case TK("False"): return VAR(false);
  937. case TK("None"): return vm->None;
  938. case TK("..."): return vm->Ellipsis;
  939. default: break;
  940. }
  941. return nullptr;
  942. }
  943. Compiler::Compiler(VM* vm, const Str& source, const Str& filename, CompileMode mode, bool unknown_global_scope){
  944. this->vm = vm;
  945. this->used = false;
  946. this->unknown_global_scope = unknown_global_scope;
  947. this->lexer = std::make_unique<Lexer>(
  948. make_sp<SourceData>(source, filename, mode)
  949. );
  950. init_pratt_rules();
  951. }
  952. CodeObject_ Compiler::compile(){
  953. if(used) FATAL_ERROR();
  954. used = true;
  955. tokens = lexer->run();
  956. // if(lexer->src->filename == "<stdin>"){
  957. // for(auto& t: tokens) std::cout << t.info() << std::endl;
  958. // }
  959. CodeObject_ code = push_global_context();
  960. advance(); // skip @sof, so prev() is always valid
  961. match_newlines(); // skip possible leading '\n'
  962. if(mode()==EVAL_MODE) {
  963. EXPR_TUPLE(false);
  964. consume(TK("@eof"));
  965. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  966. pop_context();
  967. return code;
  968. }else if(mode()==JSON_MODE){
  969. EXPR();
  970. Expr_ e = ctx()->s_expr.popx();
  971. if(!e->is_json_object()) SyntaxError("expect a JSON object, literal or array");
  972. consume(TK("@eof"));
  973. e->emit(ctx());
  974. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  975. pop_context();
  976. return code;
  977. }
  978. while (!match(TK("@eof"))) {
  979. if (match(TK("class"))) {
  980. compile_class();
  981. } else {
  982. compile_stmt();
  983. }
  984. match_newlines();
  985. }
  986. pop_context();
  987. return code;
  988. }
  989. } // namespace pkpy