compiler.h 38 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055
  1. #pragma once
  2. #include "codeobject.h"
  3. #include "common.h"
  4. #include "expr.h"
  5. #include "obj.h"
  6. namespace pkpy{
  7. class Compiler;
  8. typedef void (Compiler::*PrattCallback)();
  9. struct PrattRule{
  10. PrattCallback prefix;
  11. PrattCallback infix;
  12. Precedence precedence;
  13. };
  14. class Compiler {
  15. inline static PrattRule rules[kTokenCount];
  16. std::unique_ptr<Lexer> lexer;
  17. stack<CodeEmitContext> contexts;
  18. VM* vm;
  19. bool unknown_global_scope; // for eval/exec() call
  20. bool used;
  21. // for parsing token stream
  22. int i = 0;
  23. std::vector<Token> tokens;
  24. const Token& prev() const{ return tokens.at(i-1); }
  25. const Token& curr() const{ return tokens.at(i); }
  26. const Token& next() const{ return tokens.at(i+1); }
  27. const Token& err() const{
  28. if(i >= tokens.size()) return prev();
  29. return curr();
  30. }
  31. void advance(int delta=1) { i += delta; }
  32. CodeEmitContext* ctx() { return &contexts.top(); }
  33. CompileMode mode() const{ return lexer->src->mode; }
  34. NameScope name_scope() const {
  35. auto s = contexts.size()>1 ? NAME_LOCAL : NAME_GLOBAL;
  36. if(unknown_global_scope && s == NAME_GLOBAL) s = NAME_GLOBAL_UNKNOWN;
  37. return s;
  38. }
  39. CodeObject_ push_global_context(){
  40. CodeObject_ co = make_sp<CodeObject>(lexer->src, lexer->src->filename);
  41. contexts.push(CodeEmitContext(vm, co, contexts.size()));
  42. return co;
  43. }
  44. FuncDecl_ push_f_context(Str name){
  45. FuncDecl_ decl = make_sp<FuncDecl>();
  46. decl->code = make_sp<CodeObject>(lexer->src, name);
  47. decl->nested = name_scope() == NAME_LOCAL;
  48. contexts.push(CodeEmitContext(vm, decl->code, contexts.size()));
  49. return decl;
  50. }
  51. void pop_context(){
  52. if(!ctx()->s_expr.empty()){
  53. throw std::runtime_error("!ctx()->s_expr.empty()\n" + ctx()->_log_s_expr());
  54. }
  55. // add a `return None` in the end as a guard
  56. // previously, we only do this if the last opcode is not a return
  57. // however, this is buggy...since there may be a jump to the end (out of bound) even if the last opcode is a return
  58. ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE);
  59. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  60. // ctx()->co->optimize(vm);
  61. if(ctx()->co->varnames.size() > PK_MAX_CO_VARNAMES){
  62. SyntaxError("maximum number of local variables exceeded");
  63. }
  64. contexts.pop();
  65. }
  66. static void init_pratt_rules(){
  67. if(rules[TK(".")].precedence != PREC_NONE) return;
  68. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  69. #define METHOD(name) &Compiler::name
  70. #define NO_INFIX nullptr, PREC_NONE
  71. for(TokenIndex i=0; i<kTokenCount; i++) rules[i] = { nullptr, NO_INFIX };
  72. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  73. rules[TK("(")] = { METHOD(exprGroup), METHOD(exprCall), PREC_CALL };
  74. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscr), PREC_SUBSCRIPT };
  75. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  76. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  77. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  78. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  79. rules[TK("*")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_FACTOR };
  80. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  81. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  82. rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT };
  83. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  84. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  85. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  86. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  87. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  88. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  89. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  90. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  91. rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  92. rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  93. rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND };
  94. rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR };
  95. rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  96. rules[TK("@")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  97. rules[TK("if")] = { nullptr, METHOD(exprTernary), PREC_TERNARY };
  98. rules[TK(",")] = { nullptr, METHOD(exprTuple), PREC_TUPLE };
  99. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  100. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  101. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  102. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  103. rules[TK("not")] = { METHOD(exprNot), nullptr, PREC_LOGICAL_NOT };
  104. rules[TK("True")] = { METHOD(exprLiteral0), NO_INFIX };
  105. rules[TK("False")] = { METHOD(exprLiteral0), NO_INFIX };
  106. rules[TK("None")] = { METHOD(exprLiteral0), NO_INFIX };
  107. rules[TK("...")] = { METHOD(exprLiteral0), NO_INFIX };
  108. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  109. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  110. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  111. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  112. rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
  113. #undef METHOD
  114. #undef NO_INFIX
  115. }
  116. bool match(TokenIndex expected) {
  117. if (curr().type != expected) return false;
  118. advance();
  119. return true;
  120. }
  121. void consume(TokenIndex expected) {
  122. if (!match(expected)){
  123. SyntaxError(
  124. fmt("expected '", TK_STR(expected), "', but got '", TK_STR(curr().type), "'")
  125. );
  126. }
  127. }
  128. bool match_newlines_repl(){
  129. return match_newlines(mode()==REPL_MODE);
  130. }
  131. bool match_newlines(bool repl_throw=false) {
  132. bool consumed = false;
  133. if (curr().type == TK("@eol")) {
  134. while (curr().type == TK("@eol")) advance();
  135. consumed = true;
  136. }
  137. if (repl_throw && curr().type == TK("@eof")){
  138. throw NeedMoreLines(ctx()->is_compiling_class);
  139. }
  140. return consumed;
  141. }
  142. bool match_end_stmt() {
  143. if (match(TK(";"))) { match_newlines(); return true; }
  144. if (match_newlines() || curr().type == TK("@eof")) return true;
  145. if (curr().type == TK("@dedent")) return true;
  146. return false;
  147. }
  148. void consume_end_stmt() {
  149. if (!match_end_stmt()) SyntaxError("expected statement end");
  150. }
  151. /*************************************************/
  152. void EXPR(bool push_stack=true) {
  153. parse_expression(PREC_TUPLE+1, push_stack);
  154. }
  155. void EXPR_TUPLE(bool push_stack=true) {
  156. parse_expression(PREC_TUPLE, push_stack);
  157. }
  158. // special case for `for loop` and `comp`
  159. Expr_ EXPR_VARS(){
  160. std::vector<Expr_> items;
  161. do {
  162. consume(TK("@id"));
  163. items.push_back(make_expr<NameExpr>(prev().str(), name_scope()));
  164. } while(match(TK(",")));
  165. if(items.size()==1) return std::move(items[0]);
  166. return make_expr<TupleExpr>(std::move(items));
  167. }
  168. template <typename T, typename... Args>
  169. std::unique_ptr<T> make_expr(Args&&... args) {
  170. std::unique_ptr<T> expr = std::make_unique<T>(std::forward<Args>(args)...);
  171. expr->line = prev().line;
  172. return expr;
  173. }
  174. void exprLiteral(){
  175. ctx()->s_expr.push(make_expr<LiteralExpr>(prev().value));
  176. }
  177. void exprFString(){
  178. ctx()->s_expr.push(make_expr<FStringExpr>(std::get<Str>(prev().value)));
  179. }
  180. void exprLambda(){
  181. FuncDecl_ decl = push_f_context("<lambda>");
  182. auto e = make_expr<LambdaExpr>(decl);
  183. if(!match(TK(":"))){
  184. _compile_f_args(e->decl, false);
  185. consume(TK(":"));
  186. }
  187. // https://github.com/blueloveTH/pocketpy/issues/37
  188. parse_expression(PREC_LAMBDA + 1, false);
  189. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  190. pop_context();
  191. ctx()->s_expr.push(std::move(e));
  192. }
  193. void exprTuple(){
  194. std::vector<Expr_> items;
  195. items.push_back(ctx()->s_expr.popx());
  196. do {
  197. EXPR(); // NOTE: "1," will fail, "1,2" will be ok
  198. items.push_back(ctx()->s_expr.popx());
  199. } while(match(TK(",")));
  200. ctx()->s_expr.push(make_expr<TupleExpr>(
  201. std::move(items)
  202. ));
  203. }
  204. void exprOr(){
  205. auto e = make_expr<OrExpr>();
  206. e->lhs = ctx()->s_expr.popx();
  207. parse_expression(PREC_LOGICAL_OR + 1);
  208. e->rhs = ctx()->s_expr.popx();
  209. ctx()->s_expr.push(std::move(e));
  210. }
  211. void exprAnd(){
  212. auto e = make_expr<AndExpr>();
  213. e->lhs = ctx()->s_expr.popx();
  214. parse_expression(PREC_LOGICAL_AND + 1);
  215. e->rhs = ctx()->s_expr.popx();
  216. ctx()->s_expr.push(std::move(e));
  217. }
  218. void exprTernary(){
  219. auto e = make_expr<TernaryExpr>();
  220. e->true_expr = ctx()->s_expr.popx();
  221. // cond
  222. parse_expression(PREC_TERNARY + 1);
  223. e->cond = ctx()->s_expr.popx();
  224. consume(TK("else"));
  225. // if false
  226. parse_expression(PREC_TERNARY + 1);
  227. e->false_expr = ctx()->s_expr.popx();
  228. ctx()->s_expr.push(std::move(e));
  229. }
  230. void exprBinaryOp(){
  231. auto e = make_expr<BinaryExpr>();
  232. e->op = prev().type;
  233. e->lhs = ctx()->s_expr.popx();
  234. parse_expression(rules[e->op].precedence + 1);
  235. e->rhs = ctx()->s_expr.popx();
  236. ctx()->s_expr.push(std::move(e));
  237. }
  238. void exprNot() {
  239. parse_expression(PREC_LOGICAL_NOT + 1);
  240. ctx()->s_expr.push(make_expr<NotExpr>(ctx()->s_expr.popx()));
  241. }
  242. void exprUnaryOp(){
  243. TokenIndex op = prev().type;
  244. parse_expression(PREC_UNARY + 1);
  245. switch(op){
  246. case TK("-"):
  247. ctx()->s_expr.push(make_expr<NegatedExpr>(ctx()->s_expr.popx()));
  248. break;
  249. case TK("*"):
  250. ctx()->s_expr.push(make_expr<StarredExpr>(ctx()->s_expr.popx()));
  251. break;
  252. default: FATAL_ERROR();
  253. }
  254. }
  255. void exprGroup(){
  256. match_newlines_repl();
  257. EXPR_TUPLE(); // () is just for change precedence
  258. match_newlines_repl();
  259. consume(TK(")"));
  260. }
  261. template<typename T>
  262. void _consume_comp(Expr_ expr){
  263. static_assert(std::is_base_of<CompExpr, T>::value);
  264. std::unique_ptr<CompExpr> ce = make_expr<T>();
  265. ce->expr = std::move(expr);
  266. ce->vars = EXPR_VARS();
  267. consume(TK("in"));
  268. parse_expression(PREC_TERNARY + 1);
  269. ce->iter = ctx()->s_expr.popx();
  270. match_newlines_repl();
  271. if(match(TK("if"))){
  272. parse_expression(PREC_TERNARY + 1);
  273. ce->cond = ctx()->s_expr.popx();
  274. }
  275. ctx()->s_expr.push(std::move(ce));
  276. match_newlines_repl();
  277. }
  278. void exprList() {
  279. int line = prev().line;
  280. std::vector<Expr_> items;
  281. do {
  282. match_newlines_repl();
  283. if (curr().type == TK("]")) break;
  284. EXPR();
  285. items.push_back(ctx()->s_expr.popx());
  286. match_newlines_repl();
  287. if(items.size()==1 && match(TK("for"))){
  288. _consume_comp<ListCompExpr>(std::move(items[0]));
  289. consume(TK("]"));
  290. return;
  291. }
  292. match_newlines_repl();
  293. } while (match(TK(",")));
  294. consume(TK("]"));
  295. auto e = make_expr<ListExpr>(std::move(items));
  296. e->line = line; // override line
  297. ctx()->s_expr.push(std::move(e));
  298. }
  299. void exprMap() {
  300. bool parsing_dict = false; // {...} may be dict or set
  301. std::vector<Expr_> items;
  302. do {
  303. match_newlines_repl();
  304. if (curr().type == TK("}")) break;
  305. EXPR();
  306. if(curr().type == TK(":")) parsing_dict = true;
  307. if(parsing_dict){
  308. consume(TK(":"));
  309. EXPR();
  310. auto dict_item = make_expr<DictItemExpr>();
  311. dict_item->key = ctx()->s_expr.popx();
  312. dict_item->value = ctx()->s_expr.popx();
  313. items.push_back(std::move(dict_item));
  314. }else{
  315. items.push_back(ctx()->s_expr.popx());
  316. }
  317. match_newlines_repl();
  318. if(items.size()==1 && match(TK("for"))){
  319. if(parsing_dict) _consume_comp<DictCompExpr>(std::move(items[0]));
  320. else _consume_comp<SetCompExpr>(std::move(items[0]));
  321. consume(TK("}"));
  322. return;
  323. }
  324. match_newlines_repl();
  325. } while (match(TK(",")));
  326. consume(TK("}"));
  327. if(items.size()==0 || parsing_dict){
  328. auto e = make_expr<DictExpr>(std::move(items));
  329. ctx()->s_expr.push(std::move(e));
  330. }else{
  331. auto e = make_expr<SetExpr>(std::move(items));
  332. ctx()->s_expr.push(std::move(e));
  333. }
  334. }
  335. void exprCall() {
  336. auto e = make_expr<CallExpr>();
  337. e->callable = ctx()->s_expr.popx();
  338. do {
  339. match_newlines_repl();
  340. if (curr().type==TK(")")) break;
  341. if(curr().type==TK("@id") && next().type==TK("=")) {
  342. consume(TK("@id"));
  343. Str key = prev().str();
  344. consume(TK("="));
  345. EXPR();
  346. e->kwargs.push_back({key, ctx()->s_expr.popx()});
  347. } else{
  348. if(!e->kwargs.empty()) SyntaxError("positional argument follows keyword argument");
  349. EXPR();
  350. e->args.push_back(ctx()->s_expr.popx());
  351. }
  352. match_newlines_repl();
  353. } while (match(TK(",")));
  354. consume(TK(")"));
  355. if(e->args.size() > 32767) SyntaxError("too many positional arguments");
  356. if(e->kwargs.size() > 32767) SyntaxError("too many keyword arguments");
  357. ctx()->s_expr.push(std::move(e));
  358. }
  359. void exprName(){
  360. Str name = prev().str();
  361. NameScope scope = name_scope();
  362. if(ctx()->co->global_names.count(name)){
  363. scope = NAME_GLOBAL;
  364. }
  365. ctx()->s_expr.push(make_expr<NameExpr>(name, scope));
  366. }
  367. void exprAttrib() {
  368. consume(TK("@id"));
  369. ctx()->s_expr.push(
  370. make_expr<AttribExpr>(ctx()->s_expr.popx(), prev().str())
  371. );
  372. }
  373. void exprSubscr() {
  374. auto e = make_expr<SubscrExpr>();
  375. e->a = ctx()->s_expr.popx();
  376. auto slice = make_expr<SliceExpr>();
  377. bool is_slice = false;
  378. // a[<0> <state:1> : state<3> : state<5>]
  379. int state = 0;
  380. do{
  381. switch(state){
  382. case 0:
  383. if(match(TK(":"))){
  384. is_slice=true;
  385. state=2;
  386. break;
  387. }
  388. if(match(TK("]"))) SyntaxError();
  389. EXPR_TUPLE();
  390. slice->start = ctx()->s_expr.popx();
  391. state=1;
  392. break;
  393. case 1:
  394. if(match(TK(":"))){
  395. is_slice=true;
  396. state=2;
  397. break;
  398. }
  399. if(match(TK("]"))) goto __SUBSCR_END;
  400. SyntaxError("expected ':' or ']'");
  401. break;
  402. case 2:
  403. if(match(TK(":"))){
  404. state=4;
  405. break;
  406. }
  407. if(match(TK("]"))) goto __SUBSCR_END;
  408. EXPR_TUPLE();
  409. slice->stop = ctx()->s_expr.popx();
  410. state=3;
  411. break;
  412. case 3:
  413. if(match(TK(":"))){
  414. state=4;
  415. break;
  416. }
  417. if(match(TK("]"))) goto __SUBSCR_END;
  418. SyntaxError("expected ':' or ']'");
  419. break;
  420. case 4:
  421. if(match(TK("]"))) goto __SUBSCR_END;
  422. EXPR_TUPLE();
  423. slice->step = ctx()->s_expr.popx();
  424. state=5;
  425. break;
  426. case 5: consume(TK("]")); goto __SUBSCR_END;
  427. }
  428. }while(true);
  429. __SUBSCR_END:
  430. if(is_slice){
  431. e->b = std::move(slice);
  432. }else{
  433. if(state != 1) FATAL_ERROR();
  434. e->b = std::move(slice->start);
  435. }
  436. ctx()->s_expr.push(std::move(e));
  437. }
  438. void exprLiteral0() {
  439. ctx()->s_expr.push(make_expr<Literal0Expr>(prev().type));
  440. }
  441. void compile_block_body() {
  442. consume(TK(":"));
  443. if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){
  444. compile_stmt(); // inline block
  445. return;
  446. }
  447. if(!match_newlines(mode()==REPL_MODE)){
  448. SyntaxError("expected a new line after ':'");
  449. }
  450. consume(TK("@indent"));
  451. while (curr().type != TK("@dedent")) {
  452. match_newlines();
  453. compile_stmt();
  454. match_newlines();
  455. }
  456. consume(TK("@dedent"));
  457. }
  458. Str _compile_import() {
  459. if(name_scope() != NAME_GLOBAL) SyntaxError("import statement should be used in global scope");
  460. consume(TK("@id"));
  461. Str name = prev().str();
  462. ctx()->emit(OP_IMPORT_NAME, StrName(name).index, prev().line);
  463. return name;
  464. }
  465. // import a as b
  466. void compile_normal_import() {
  467. do {
  468. Str name = _compile_import();
  469. if (match(TK("as"))) {
  470. consume(TK("@id"));
  471. name = prev().str();
  472. }
  473. ctx()->emit(OP_STORE_GLOBAL, StrName(name).index, prev().line);
  474. } while (match(TK(",")));
  475. consume_end_stmt();
  476. }
  477. // from a import b as c, d as e
  478. void compile_from_import() {
  479. _compile_import();
  480. consume(TK("import"));
  481. if (match(TK("*"))) {
  482. ctx()->emit(OP_IMPORT_STAR, BC_NOARG, prev().line);
  483. consume_end_stmt();
  484. return;
  485. }
  486. do {
  487. ctx()->emit(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  488. consume(TK("@id"));
  489. Str name = prev().str();
  490. ctx()->emit(OP_LOAD_ATTR, StrName(name).index, prev().line);
  491. if (match(TK("as"))) {
  492. consume(TK("@id"));
  493. name = prev().str();
  494. }
  495. ctx()->emit(OP_STORE_GLOBAL, StrName(name).index, prev().line);
  496. } while (match(TK(",")));
  497. ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  498. consume_end_stmt();
  499. }
  500. void parse_expression(int precedence, bool push_stack=true) {
  501. advance();
  502. PrattCallback prefix = rules[prev().type].prefix;
  503. if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(prev().type));
  504. (this->*prefix)();
  505. while (rules[curr().type].precedence >= precedence) {
  506. TokenIndex op = curr().type;
  507. advance();
  508. PrattCallback infix = rules[op].infix;
  509. if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true");
  510. (this->*infix)();
  511. }
  512. if(!push_stack) ctx()->emit_expr();
  513. }
  514. void compile_if_stmt() {
  515. EXPR(false); // condition
  516. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  517. compile_block_body();
  518. if (match(TK("elif"))) {
  519. int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  520. ctx()->patch_jump(patch);
  521. compile_if_stmt();
  522. ctx()->patch_jump(exit_patch);
  523. } else if (match(TK("else"))) {
  524. int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  525. ctx()->patch_jump(patch);
  526. compile_block_body();
  527. ctx()->patch_jump(exit_patch);
  528. } else {
  529. ctx()->patch_jump(patch);
  530. }
  531. }
  532. void compile_while_loop() {
  533. ctx()->enter_block(WHILE_LOOP);
  534. EXPR(false); // condition
  535. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  536. compile_block_body();
  537. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  538. ctx()->patch_jump(patch);
  539. ctx()->exit_block();
  540. }
  541. void compile_for_loop() {
  542. Expr_ vars = EXPR_VARS();
  543. consume(TK("in"));
  544. EXPR_TUPLE(false);
  545. ctx()->emit(OP_GET_ITER, BC_NOARG, BC_KEEPLINE);
  546. ctx()->enter_block(FOR_LOOP);
  547. ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE);
  548. bool ok = vars->emit_store(ctx());
  549. if(!ok) SyntaxError(); // this error occurs in `vars` instead of this line, but...nevermind
  550. compile_block_body();
  551. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  552. ctx()->exit_block();
  553. }
  554. void compile_try_except() {
  555. ctx()->enter_block(TRY_EXCEPT);
  556. compile_block_body();
  557. std::vector<int> patches = {
  558. ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE)
  559. };
  560. ctx()->exit_block();
  561. do {
  562. consume(TK("except"));
  563. if(match(TK("@id"))){
  564. ctx()->emit(OP_EXCEPTION_MATCH, StrName(prev().str()).index, prev().line);
  565. }else{
  566. ctx()->emit(OP_LOAD_TRUE, BC_NOARG, BC_KEEPLINE);
  567. }
  568. int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE);
  569. // pop the exception on match
  570. ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  571. compile_block_body();
  572. patches.push_back(ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE));
  573. ctx()->patch_jump(patch);
  574. }while(curr().type == TK("except"));
  575. // no match, re-raise
  576. ctx()->emit(OP_RE_RAISE, BC_NOARG, BC_KEEPLINE);
  577. for (int patch : patches) ctx()->patch_jump(patch);
  578. }
  579. void compile_decorated(){
  580. std::vector<Expr_> decorators;
  581. do{
  582. EXPR();
  583. decorators.push_back(ctx()->s_expr.popx());
  584. if(!match_newlines_repl()) SyntaxError();
  585. }while(match(TK("@")));
  586. consume(TK("def"));
  587. compile_function(decorators);
  588. }
  589. bool try_compile_assignment(){
  590. Expr* lhs_p = ctx()->s_expr.top().get();
  591. bool inplace;
  592. switch (curr().type) {
  593. case TK("+="): case TK("-="): case TK("*="): case TK("/="): case TK("//="): case TK("%="):
  594. case TK("<<="): case TK(">>="): case TK("&="): case TK("|="): case TK("^="): {
  595. if(ctx()->is_compiling_class) SyntaxError();
  596. inplace = true;
  597. advance();
  598. auto e = make_expr<BinaryExpr>();
  599. e->op = prev().type - 1; // -1 to remove =
  600. e->lhs = ctx()->s_expr.popx();
  601. EXPR_TUPLE();
  602. e->rhs = ctx()->s_expr.popx();
  603. ctx()->s_expr.push(std::move(e));
  604. } break;
  605. case TK("="):
  606. inplace = false;
  607. advance();
  608. EXPR_TUPLE();
  609. break;
  610. default: return false;
  611. }
  612. // std::cout << ctx()->_log_s_expr() << std::endl;
  613. Expr_ rhs = ctx()->s_expr.popx();
  614. if(lhs_p->is_starred() || rhs->is_starred()){
  615. SyntaxError("can't use starred expression here");
  616. }
  617. rhs->emit(ctx());
  618. bool ok = lhs_p->emit_store(ctx());
  619. if(!ok) SyntaxError();
  620. if(!inplace) ctx()->s_expr.pop();
  621. return true;
  622. }
  623. void compile_stmt() {
  624. advance();
  625. int kw_line = prev().line; // backup line number
  626. switch(prev().type){
  627. case TK("break"):
  628. if (!ctx()->is_curr_block_loop()) SyntaxError("'break' outside loop");
  629. ctx()->emit(OP_LOOP_BREAK, BC_NOARG, kw_line);
  630. consume_end_stmt();
  631. break;
  632. case TK("continue"):
  633. if (!ctx()->is_curr_block_loop()) SyntaxError("'continue' not properly in loop");
  634. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, kw_line);
  635. consume_end_stmt();
  636. break;
  637. case TK("yield"):
  638. if (contexts.size() <= 1) SyntaxError("'yield' outside function");
  639. EXPR_TUPLE(false);
  640. // if yield present, mark the function as generator
  641. ctx()->co->is_generator = true;
  642. ctx()->emit(OP_YIELD_VALUE, BC_NOARG, kw_line);
  643. consume_end_stmt();
  644. break;
  645. case TK("yield from"):
  646. if (contexts.size() <= 1) SyntaxError("'yield from' outside function");
  647. EXPR_TUPLE(false);
  648. // if yield from present, mark the function as generator
  649. ctx()->co->is_generator = true;
  650. ctx()->emit(OP_GET_ITER, BC_NOARG, kw_line);
  651. ctx()->enter_block(FOR_LOOP);
  652. ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE);
  653. ctx()->emit(OP_YIELD_VALUE, BC_NOARG, BC_KEEPLINE);
  654. ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
  655. ctx()->exit_block();
  656. consume_end_stmt();
  657. break;
  658. case TK("return"):
  659. if (contexts.size() <= 1) SyntaxError("'return' outside function");
  660. if(match_end_stmt()){
  661. ctx()->emit(OP_LOAD_NONE, BC_NOARG, kw_line);
  662. }else{
  663. EXPR_TUPLE(false);
  664. consume_end_stmt();
  665. }
  666. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, kw_line);
  667. break;
  668. /*************************************************/
  669. case TK("if"): compile_if_stmt(); break;
  670. case TK("while"): compile_while_loop(); break;
  671. case TK("for"): compile_for_loop(); break;
  672. case TK("import"): compile_normal_import(); break;
  673. case TK("from"): compile_from_import(); break;
  674. case TK("def"): compile_function(); break;
  675. case TK("@"): compile_decorated(); break;
  676. case TK("try"): compile_try_except(); break;
  677. case TK("pass"): consume_end_stmt(); break;
  678. /*************************************************/
  679. case TK("assert"):
  680. EXPR_TUPLE(false);
  681. ctx()->emit(OP_ASSERT, BC_NOARG, kw_line);
  682. consume_end_stmt();
  683. break;
  684. case TK("global"):
  685. do {
  686. consume(TK("@id"));
  687. ctx()->co->global_names.insert(prev().str());
  688. } while (match(TK(",")));
  689. consume_end_stmt();
  690. break;
  691. case TK("raise"): {
  692. consume(TK("@id"));
  693. int dummy_t = StrName(prev().str()).index;
  694. if(match(TK("(")) && !match(TK(")"))){
  695. EXPR(false); consume(TK(")"));
  696. }else{
  697. ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE);
  698. }
  699. ctx()->emit(OP_RAISE, dummy_t, kw_line);
  700. consume_end_stmt();
  701. } break;
  702. case TK("del"): {
  703. EXPR_TUPLE();
  704. Expr_ e = ctx()->s_expr.popx();
  705. bool ok = e->emit_del(ctx());
  706. if(!ok) SyntaxError();
  707. consume_end_stmt();
  708. } break;
  709. case TK("with"): {
  710. EXPR(false);
  711. consume(TK("as"));
  712. consume(TK("@id"));
  713. StrName name(prev().str());
  714. ctx()->emit(OP_STORE_NAME, name.index, prev().line);
  715. ctx()->emit(OP_LOAD_NAME, name.index, prev().line);
  716. ctx()->emit(OP_WITH_ENTER, BC_NOARG, prev().line);
  717. compile_block_body();
  718. ctx()->emit(OP_LOAD_NAME, name.index, prev().line);
  719. ctx()->emit(OP_WITH_EXIT, BC_NOARG, prev().line);
  720. } break;
  721. /*************************************************/
  722. case TK("$label"): {
  723. if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE");
  724. consume(TK("@id"));
  725. bool ok = ctx()->add_label(prev().str());
  726. if(!ok) SyntaxError("label " + prev().str().escape() + " already exists");
  727. consume_end_stmt();
  728. } break;
  729. case TK("$goto"):
  730. if(mode()!=EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE");
  731. consume(TK("@id"));
  732. ctx()->emit(OP_GOTO, StrName(prev().str()).index, prev().line);
  733. consume_end_stmt();
  734. break;
  735. /*************************************************/
  736. // handle dangling expression or assignment
  737. default: {
  738. advance(-1); // do revert since we have pre-called advance() at the beginning
  739. EXPR_TUPLE();
  740. if(!try_compile_assignment()){
  741. ctx()->emit_expr();
  742. if((mode()==CELL_MODE || mode()==REPL_MODE) && name_scope()==NAME_GLOBAL){
  743. ctx()->emit(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE);
  744. }else{
  745. ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  746. }
  747. }
  748. consume_end_stmt();
  749. }
  750. }
  751. }
  752. void compile_class(){
  753. consume(TK("@id"));
  754. int namei = StrName(prev().str()).index;
  755. int super_namei = -1;
  756. if(match(TK("(")) && match(TK("@id"))){
  757. super_namei = StrName(prev().str()).index;
  758. consume(TK(")"));
  759. }
  760. if(super_namei == -1) ctx()->emit(OP_LOAD_NONE, BC_NOARG, prev().line);
  761. else ctx()->emit(OP_LOAD_GLOBAL, super_namei, prev().line);
  762. ctx()->emit(OP_BEGIN_CLASS, namei, BC_KEEPLINE);
  763. ctx()->is_compiling_class = true;
  764. compile_block_body();
  765. ctx()->is_compiling_class = false;
  766. ctx()->emit(OP_END_CLASS, BC_NOARG, BC_KEEPLINE);
  767. }
  768. void _compile_f_args(FuncDecl_ decl, bool enable_type_hints){
  769. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  770. do {
  771. if(state == 3) SyntaxError("**kwargs should be the last argument");
  772. match_newlines();
  773. if(match(TK("*"))){
  774. if(state < 1) state = 1;
  775. else SyntaxError("*args should be placed before **kwargs");
  776. }
  777. else if(match(TK("**"))){
  778. state = 3;
  779. }
  780. consume(TK("@id"));
  781. StrName name = prev().str();
  782. // check duplicate argument name
  783. for(int i: decl->args){
  784. if(decl->code->varnames[i] == name) {
  785. SyntaxError("duplicate argument name");
  786. }
  787. }
  788. if(decl->starred_arg!=-1 && decl->code->varnames[decl->starred_arg] == name){
  789. SyntaxError("duplicate argument name");
  790. }
  791. for(auto& kv: decl->kwargs){
  792. if(decl->code->varnames[kv.key] == name){
  793. SyntaxError("duplicate argument name");
  794. }
  795. }
  796. // eat type hints
  797. if(enable_type_hints && match(TK(":"))) consume(TK("@id"));
  798. if(state == 0 && curr().type == TK("=")) state = 2;
  799. int index = ctx()->add_varname(name);
  800. switch (state)
  801. {
  802. case 0:
  803. decl->args.push_back(index);
  804. break;
  805. case 1:
  806. decl->starred_arg = index;
  807. state+=1;
  808. break;
  809. case 2: {
  810. consume(TK("="));
  811. PyObject* value = read_literal();
  812. if(value == nullptr){
  813. SyntaxError(Str("expect a literal, not ") + TK_STR(curr().type));
  814. }
  815. decl->kwargs.push_back(FuncDecl::KwArg{index, value});
  816. } break;
  817. case 3: SyntaxError("**kwargs is not supported yet"); break;
  818. }
  819. } while (match(TK(",")));
  820. }
  821. void compile_function(const std::vector<Expr_>& decorators={}){
  822. Str obj_name;
  823. Str decl_name;
  824. consume(TK("@id"));
  825. decl_name = prev().str();
  826. if(!ctx()->is_compiling_class && match(TK("@"))){
  827. consume(TK("@id"));
  828. obj_name = decl_name;
  829. decl_name = prev().str();
  830. }
  831. FuncDecl_ decl = push_f_context(decl_name);
  832. consume(TK("("));
  833. if (!match(TK(")"))) {
  834. _compile_f_args(decl, true);
  835. consume(TK(")"));
  836. }
  837. if(match(TK("->"))){
  838. if(!match(TK("None"))) consume(TK("@id"));
  839. }
  840. compile_block_body();
  841. pop_context();
  842. PyObject* docstring = nullptr;
  843. if(decl->code->codes.size()>=2 && decl->code->codes[0].op == OP_LOAD_CONST && decl->code->codes[1].op == OP_POP_TOP){
  844. PyObject* c = decl->code->consts[decl->code->codes[0].arg];
  845. if(is_type(c, vm->tp_str)){
  846. decl->code->codes[0].op = OP_NO_OP;
  847. decl->code->codes[1].op = OP_NO_OP;
  848. docstring = c;
  849. }
  850. }
  851. ctx()->emit(OP_LOAD_FUNCTION, ctx()->add_func_decl(decl), prev().line);
  852. if(docstring != nullptr){
  853. ctx()->emit(OP_SETUP_DOCSTRING, ctx()->add_const(docstring), prev().line);
  854. }
  855. // add decorators
  856. for(auto it=decorators.rbegin(); it!=decorators.rend(); ++it){
  857. (*it)->emit(ctx());
  858. ctx()->emit(OP_ROT_TWO, BC_NOARG, (*it)->line);
  859. ctx()->emit(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE);
  860. ctx()->emit(OP_ROT_TWO, BC_NOARG, BC_KEEPLINE);
  861. ctx()->emit(OP_CALL, 1, (*it)->line);
  862. }
  863. if(!ctx()->is_compiling_class){
  864. if(obj_name.empty()){
  865. auto e = make_expr<NameExpr>(decl_name, name_scope());
  866. e->emit_store(ctx());
  867. } else {
  868. ctx()->emit(OP_LOAD_GLOBAL, StrName(obj_name).index, prev().line);
  869. int index = StrName(decl_name).index;
  870. ctx()->emit(OP_STORE_ATTR, index, prev().line);
  871. }
  872. }else{
  873. int index = StrName(decl_name).index;
  874. ctx()->emit(OP_STORE_CLASS_ATTR, index, prev().line);
  875. }
  876. }
  877. PyObject* to_object(const TokenValue& value){
  878. PyObject* obj = nullptr;
  879. if(std::holds_alternative<i64>(value)){
  880. obj = VAR(std::get<i64>(value));
  881. }
  882. if(std::holds_alternative<f64>(value)){
  883. obj = VAR(std::get<f64>(value));
  884. }
  885. if(std::holds_alternative<Str>(value)){
  886. obj = VAR(std::get<Str>(value));
  887. }
  888. if(obj == nullptr) FATAL_ERROR();
  889. return obj;
  890. }
  891. PyObject* read_literal(){
  892. advance();
  893. switch(prev().type){
  894. case TK("-"): {
  895. consume(TK("@num"));
  896. PyObject* val = to_object(prev().value);
  897. return vm->py_negate(val);
  898. }
  899. case TK("@num"): return to_object(prev().value);
  900. case TK("@str"): return to_object(prev().value);
  901. case TK("True"): return VAR(true);
  902. case TK("False"): return VAR(false);
  903. case TK("None"): return vm->None;
  904. case TK("..."): return vm->Ellipsis;
  905. default: break;
  906. }
  907. return nullptr;
  908. }
  909. void SyntaxError(Str msg){ lexer->throw_err("SyntaxError", msg, err().line, err().start); }
  910. void SyntaxError(){ lexer->throw_err("SyntaxError", "invalid syntax", err().line, err().start); }
  911. void IndentationError(Str msg){ lexer->throw_err("IndentationError", msg, err().line, err().start); }
  912. public:
  913. Compiler(VM* vm, const Str& source, const Str& filename, CompileMode mode, bool unknown_global_scope=false){
  914. this->vm = vm;
  915. this->used = false;
  916. this->unknown_global_scope = unknown_global_scope;
  917. this->lexer = std::make_unique<Lexer>(
  918. make_sp<SourceData>(source, filename, mode)
  919. );
  920. init_pratt_rules();
  921. }
  922. CodeObject_ compile(){
  923. if(used) FATAL_ERROR();
  924. used = true;
  925. tokens = lexer->run();
  926. // if(lexer->src->filename == "<stdin>"){
  927. // for(auto& t: tokens) std::cout << t.info() << std::endl;
  928. // }
  929. CodeObject_ code = push_global_context();
  930. advance(); // skip @sof, so prev() is always valid
  931. match_newlines(); // skip possible leading '\n'
  932. if(mode()==EVAL_MODE) {
  933. EXPR_TUPLE(false);
  934. consume(TK("@eof"));
  935. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  936. pop_context();
  937. return code;
  938. }else if(mode()==JSON_MODE){
  939. EXPR();
  940. Expr_ e = ctx()->s_expr.popx();
  941. if(!e->is_json_object()) SyntaxError("expect a JSON object, literal or array");
  942. consume(TK("@eof"));
  943. e->emit(ctx());
  944. ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  945. pop_context();
  946. return code;
  947. }
  948. while (!match(TK("@eof"))) {
  949. if (match(TK("class"))) {
  950. compile_class();
  951. } else {
  952. compile_stmt();
  953. }
  954. match_newlines();
  955. }
  956. pop_context();
  957. return code;
  958. }
  959. };
  960. } // namespace pkpy