compiler.cpp 54 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422
  1. #include "pocketpy/compiler.h"
  2. namespace pkpy{
  3. PrattRule Compiler::rules[kTokenCount];
  4. NameScope Compiler::name_scope() const {
  5. auto s = contexts.size()>1 ? NAME_LOCAL : NAME_GLOBAL;
  6. if(unknown_global_scope && s == NAME_GLOBAL) s = NAME_GLOBAL_UNKNOWN;
  7. return s;
  8. }
  9. CodeObject_ Compiler::push_global_context(){
  10. CodeObject_ co = std::make_shared<CodeObject>(lexer.src, lexer.src->filename);
  11. co->start_line = i==0 ? 1 : prev().line;
  12. contexts.push(CodeEmitContext(vm, co, contexts.size()));
  13. return co;
  14. }
  15. FuncDecl_ Compiler::push_f_context(Str name){
  16. FuncDecl_ decl = std::make_shared<FuncDecl>();
  17. decl->code = std::make_shared<CodeObject>(lexer.src, name);
  18. decl->code->start_line = i==0 ? 1 : prev().line;
  19. decl->nested = name_scope() == NAME_LOCAL;
  20. contexts.push(CodeEmitContext(vm, decl->code, contexts.size()));
  21. contexts.top().func = decl;
  22. return decl;
  23. }
  24. void Compiler::pop_context(){
  25. if(!ctx()->s_expr.empty()){
  26. throw std::runtime_error("!ctx()->s_expr.empty()");
  27. }
  28. // add a `return None` in the end as a guard
  29. // previously, we only do this if the last opcode is not a return
  30. // however, this is buggy...since there may be a jump to the end (out of bound) even if the last opcode is a return
  31. ctx()->emit_(OP_RETURN_VALUE, 1, BC_KEEPLINE, true);
  32. // find the last valid token
  33. int j = i-1;
  34. while(tokens[j].type == TK("@eol") || tokens[j].type == TK("@dedent") || tokens[j].type == TK("@eof")) j--;
  35. ctx()->co->end_line = tokens[j].line;
  36. // some check here
  37. auto& codes = ctx()->co->codes;
  38. if(ctx()->co->varnames.size() > PK_MAX_CO_VARNAMES){
  39. SyntaxError("maximum number of local variables exceeded");
  40. }
  41. if(ctx()->co->consts.size() > 65530){
  42. SyntaxError("maximum number of constants exceeded");
  43. }
  44. if(codes.size() > 65530 && ctx()->co->src->mode != JSON_MODE){
  45. // json mode does not contain jump instructions, so it is safe to ignore this check
  46. SyntaxError("maximum number of opcodes exceeded");
  47. }
  48. // pre-compute LOOP_BREAK and LOOP_CONTINUE
  49. for(int i=0; i<codes.size(); i++){
  50. Bytecode& bc = codes[i];
  51. if(bc.op == OP_LOOP_CONTINUE){
  52. bc.arg = ctx()->co->blocks[bc.arg].start;
  53. }else if(bc.op == OP_LOOP_BREAK){
  54. bc.arg = ctx()->co->blocks[bc.arg].get_break_end();
  55. }
  56. }
  57. // pre-compute func->is_simple
  58. FuncDecl_ func = contexts.top().func;
  59. if(func){
  60. // check generator
  61. for(Bytecode bc: func->code->codes){
  62. if(bc.op == OP_YIELD_VALUE || bc.op == OP_FOR_ITER_YIELD_VALUE){
  63. func->type = FuncType::GENERATOR;
  64. for(Bytecode bc: func->code->codes){
  65. if(bc.op == OP_RETURN_VALUE && bc.arg == BC_NOARG){
  66. SyntaxError("'return' with argument inside generator function");
  67. }
  68. }
  69. break;
  70. }
  71. }
  72. if(func->type == FuncType::UNSET){
  73. bool is_simple = true;
  74. if(func->kwargs.size() > 0) is_simple = false;
  75. if(func->starred_arg >= 0) is_simple = false;
  76. if(func->starred_kwarg >= 0) is_simple = false;
  77. if(is_simple){
  78. func->type = FuncType::SIMPLE;
  79. bool is_empty = false;
  80. if(func->code->codes.size() == 1){
  81. Bytecode bc = func->code->codes[0];
  82. if(bc.op == OP_RETURN_VALUE && bc.arg == 1){
  83. is_empty = true;
  84. }
  85. }
  86. if(is_empty) func->type = FuncType::EMPTY;
  87. }
  88. else func->type = FuncType::NORMAL;
  89. }
  90. PK_ASSERT(func->type != FuncType::UNSET);
  91. }
  92. contexts.pop();
  93. }
  94. void Compiler::init_pratt_rules(){
  95. PK_LOCAL_STATIC bool initialized = false;
  96. if(initialized) return;
  97. initialized = true;
  98. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  99. #define PK_METHOD(name) &Compiler::name
  100. #define PK_NO_INFIX nullptr, PREC_LOWEST
  101. for(TokenIndex i=0; i<kTokenCount; i++) rules[i] = { nullptr, PK_NO_INFIX };
  102. rules[TK(".")] = { nullptr, PK_METHOD(exprAttrib), PREC_PRIMARY };
  103. rules[TK("(")] = { PK_METHOD(exprGroup), PK_METHOD(exprCall), PREC_PRIMARY };
  104. rules[TK("[")] = { PK_METHOD(exprList), PK_METHOD(exprSubscr), PREC_PRIMARY };
  105. rules[TK("{")] = { PK_METHOD(exprMap), PK_NO_INFIX };
  106. rules[TK("%")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
  107. rules[TK("+")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_TERM };
  108. rules[TK("-")] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_TERM };
  109. rules[TK("*")] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_FACTOR };
  110. rules[TK("~")] = { PK_METHOD(exprUnaryOp), nullptr, PREC_UNARY };
  111. rules[TK("/")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
  112. rules[TK("//")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
  113. rules[TK("**")] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_EXPONENT };
  114. rules[TK(">")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  115. rules[TK("<")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  116. rules[TK("==")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  117. rules[TK("!=")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  118. rules[TK(">=")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  119. rules[TK("<=")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  120. rules[TK("in")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  121. rules[TK("is")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  122. rules[TK("<<")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  123. rules[TK(">>")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  124. rules[TK("&")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_AND };
  125. rules[TK("|")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_OR };
  126. rules[TK("^")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  127. rules[TK("@")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
  128. rules[TK("if")] = { nullptr, PK_METHOD(exprTernary), PREC_TERNARY };
  129. rules[TK("not in")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  130. rules[TK("is not")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  131. rules[TK("and") ] = { nullptr, PK_METHOD(exprAnd), PREC_LOGICAL_AND };
  132. rules[TK("or")] = { nullptr, PK_METHOD(exprOr), PREC_LOGICAL_OR };
  133. rules[TK("not")] = { PK_METHOD(exprNot), nullptr, PREC_LOGICAL_NOT };
  134. rules[TK("True")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
  135. rules[TK("False")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
  136. rules[TK("None")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
  137. rules[TK("...")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
  138. rules[TK("lambda")] = { PK_METHOD(exprLambda), PK_NO_INFIX };
  139. rules[TK("@id")] = { PK_METHOD(exprName), PK_NO_INFIX };
  140. rules[TK("@num")] = { PK_METHOD(exprLiteral), PK_NO_INFIX };
  141. rules[TK("@str")] = { PK_METHOD(exprLiteral), PK_NO_INFIX };
  142. rules[TK("@fstr")] = { PK_METHOD(exprFString), PK_NO_INFIX };
  143. rules[TK("@long")] = { PK_METHOD(exprLong), PK_NO_INFIX };
  144. rules[TK("@imag")] = { PK_METHOD(exprImag), PK_NO_INFIX };
  145. rules[TK("@bytes")] = { PK_METHOD(exprBytes), PK_NO_INFIX };
  146. rules[TK(":")] = { PK_METHOD(exprSlice0), PK_METHOD(exprSlice1), PREC_PRIMARY };
  147. #undef PK_METHOD
  148. #undef PK_NO_INFIX
  149. }
  150. bool Compiler::match(TokenIndex expected) {
  151. if (curr().type != expected) return false;
  152. advance();
  153. return true;
  154. }
  155. void Compiler::consume(TokenIndex expected) {
  156. if (!match(expected)){
  157. SyntaxError(
  158. _S("expected '", TK_STR(expected), "', got '", TK_STR(curr().type), "'")
  159. );
  160. }
  161. }
  162. bool Compiler::match_newlines_repl(){
  163. return match_newlines(mode()==REPL_MODE);
  164. }
  165. bool Compiler::match_newlines(bool repl_throw) {
  166. bool consumed = false;
  167. if (curr().type == TK("@eol")) {
  168. while (curr().type == TK("@eol")) advance();
  169. consumed = true;
  170. }
  171. if (repl_throw && curr().type == TK("@eof")){
  172. throw NeedMoreLines(ctx()->is_compiling_class);
  173. }
  174. return consumed;
  175. }
  176. bool Compiler::match_end_stmt() {
  177. if (match(TK(";"))) { match_newlines(); return true; }
  178. if (match_newlines() || curr().type == TK("@eof")) return true;
  179. if (curr().type == TK("@dedent")) return true;
  180. return false;
  181. }
  182. void Compiler::consume_end_stmt() {
  183. if (!match_end_stmt()) SyntaxError("expected statement end");
  184. }
  185. void Compiler::EXPR() {
  186. parse_expression(PREC_LOWEST+1);
  187. }
  188. void Compiler::EXPR_TUPLE(bool allow_slice) {
  189. parse_expression(PREC_LOWEST+1, allow_slice);
  190. if(!match(TK(","))) return;
  191. // tuple expression
  192. Expr_vector items;
  193. items.push_back(ctx()->s_expr.popx());
  194. do {
  195. if(curr().brackets_level) match_newlines_repl();
  196. if(!is_expression(allow_slice)) break;
  197. parse_expression(PREC_LOWEST+1, allow_slice);
  198. items.push_back(ctx()->s_expr.popx());
  199. if(curr().brackets_level) match_newlines_repl();
  200. } while(match(TK(",")));
  201. ctx()->s_expr.push(make_expr<TupleExpr>(std::move(items)));
  202. }
  203. // special case for `for loop` and `comp`
  204. Expr_ Compiler::EXPR_VARS(){
  205. Expr_vector items;
  206. do {
  207. consume(TK("@id"));
  208. items.push_back(make_expr<NameExpr>(prev().str(), name_scope()));
  209. } while(match(TK(",")));
  210. if(items.size()==1) return std::move(items[0]);
  211. return make_expr<TupleExpr>(std::move(items));
  212. }
  213. void Compiler::exprLiteral(){
  214. ctx()->s_expr.push(make_expr<LiteralExpr>(prev().value));
  215. }
  216. void Compiler::exprLong(){
  217. ctx()->s_expr.push(make_expr<LongExpr>(prev().str()));
  218. }
  219. void Compiler::exprImag(){
  220. ctx()->s_expr.push(make_expr<ImagExpr>(std::get<f64>(prev().value)));
  221. }
  222. void Compiler::exprBytes(){
  223. ctx()->s_expr.push(make_expr<BytesExpr>(std::get<Str>(prev().value)));
  224. }
  225. void Compiler::exprFString(){
  226. ctx()->s_expr.push(make_expr<FStringExpr>(std::get<Str>(prev().value)));
  227. }
  228. void Compiler::exprLambda(){
  229. FuncDecl_ decl = push_f_context("<lambda>");
  230. auto e = make_expr<LambdaExpr>(decl);
  231. if(!match(TK(":"))){
  232. _compile_f_args(e->decl, false);
  233. consume(TK(":"));
  234. }
  235. // https://github.com/pocketpy/pocketpy/issues/37
  236. parse_expression(PREC_LAMBDA + 1);
  237. ctx()->emit_expr();
  238. ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  239. pop_context();
  240. ctx()->s_expr.push(std::move(e));
  241. }
  242. void Compiler::exprOr(){
  243. auto e = make_expr<OrExpr>();
  244. e->lhs = ctx()->s_expr.popx();
  245. parse_expression(PREC_LOGICAL_OR + 1);
  246. e->rhs = ctx()->s_expr.popx();
  247. ctx()->s_expr.push(std::move(e));
  248. }
  249. void Compiler::exprAnd(){
  250. auto e = make_expr<AndExpr>();
  251. e->lhs = ctx()->s_expr.popx();
  252. parse_expression(PREC_LOGICAL_AND + 1);
  253. e->rhs = ctx()->s_expr.popx();
  254. ctx()->s_expr.push(std::move(e));
  255. }
  256. void Compiler::exprTernary(){
  257. auto e = make_expr<TernaryExpr>();
  258. e->true_expr = ctx()->s_expr.popx();
  259. // cond
  260. parse_expression(PREC_TERNARY + 1);
  261. e->cond = ctx()->s_expr.popx();
  262. consume(TK("else"));
  263. // if false
  264. parse_expression(PREC_TERNARY + 1);
  265. e->false_expr = ctx()->s_expr.popx();
  266. ctx()->s_expr.push(std::move(e));
  267. }
  268. void Compiler::exprBinaryOp(){
  269. auto e = make_expr<BinaryExpr>();
  270. e->op = prev().type;
  271. e->lhs = ctx()->s_expr.popx();
  272. parse_expression(rules[e->op].precedence + 1);
  273. e->rhs = ctx()->s_expr.popx();
  274. ctx()->s_expr.push(std::move(e));
  275. }
  276. void Compiler::exprNot() {
  277. parse_expression(PREC_LOGICAL_NOT + 1);
  278. ctx()->s_expr.push(make_expr<NotExpr>(ctx()->s_expr.popx()));
  279. }
  280. void Compiler::exprUnaryOp(){
  281. TokenIndex op = prev().type;
  282. parse_expression(PREC_UNARY + 1);
  283. switch(op){
  284. case TK("-"):
  285. ctx()->s_expr.push(make_expr<NegatedExpr>(ctx()->s_expr.popx()));
  286. break;
  287. case TK("~"):
  288. ctx()->s_expr.push(make_expr<InvertExpr>(ctx()->s_expr.popx()));
  289. break;
  290. case TK("*"):
  291. ctx()->s_expr.push(make_expr<StarredExpr>(1, ctx()->s_expr.popx()));
  292. break;
  293. case TK("**"):
  294. ctx()->s_expr.push(make_expr<StarredExpr>(2, ctx()->s_expr.popx()));
  295. break;
  296. default: PK_FATAL_ERROR();
  297. }
  298. }
  299. void Compiler::exprGroup(){
  300. match_newlines_repl();
  301. EXPR_TUPLE(); // () is just for change precedence
  302. match_newlines_repl();
  303. consume(TK(")"));
  304. if(ctx()->s_expr.top()->is_tuple()) return;
  305. Expr_ g = make_expr<GroupedExpr>(ctx()->s_expr.popx());
  306. ctx()->s_expr.push(std::move(g));
  307. }
  308. void Compiler::consume_comp(unique_ptr_128<CompExpr> ce, Expr_ expr){
  309. ce->expr = std::move(expr);
  310. ce->vars = EXPR_VARS();
  311. consume(TK("in"));
  312. parse_expression(PREC_TERNARY + 1);
  313. ce->iter = ctx()->s_expr.popx();
  314. match_newlines_repl();
  315. if(match(TK("if"))){
  316. parse_expression(PREC_TERNARY + 1);
  317. ce->cond = ctx()->s_expr.popx();
  318. }
  319. ctx()->s_expr.push(std::move(ce));
  320. match_newlines_repl();
  321. }
  322. void Compiler::exprList() {
  323. int line = prev().line;
  324. Expr_vector items;
  325. do {
  326. match_newlines_repl();
  327. if (curr().type == TK("]")) break;
  328. EXPR();
  329. items.push_back(ctx()->s_expr.popx());
  330. match_newlines_repl();
  331. if(items.size()==1 && match(TK("for"))){
  332. consume_comp(make_expr<ListCompExpr>(), std::move(items[0]));
  333. consume(TK("]"));
  334. return;
  335. }
  336. match_newlines_repl();
  337. } while (match(TK(",")));
  338. consume(TK("]"));
  339. auto e = make_expr<ListExpr>(std::move(items));
  340. e->line = line; // override line
  341. ctx()->s_expr.push(std::move(e));
  342. }
  343. void Compiler::exprMap() {
  344. bool parsing_dict = false; // {...} may be dict or set
  345. Expr_vector items;
  346. do {
  347. match_newlines_repl();
  348. if (curr().type == TK("}")) break;
  349. EXPR();
  350. int star_level = ctx()->s_expr.top()->star_level();
  351. if(star_level==2 || curr().type == TK(":")){
  352. parsing_dict = true;
  353. }
  354. if(parsing_dict){
  355. auto dict_item = make_expr<DictItemExpr>();
  356. if(star_level == 2){
  357. dict_item->key = nullptr;
  358. dict_item->value = ctx()->s_expr.popx();
  359. }else{
  360. consume(TK(":"));
  361. EXPR();
  362. dict_item->key = ctx()->s_expr.popx();
  363. dict_item->value = ctx()->s_expr.popx();
  364. }
  365. items.push_back(std::move(dict_item));
  366. }else{
  367. items.push_back(ctx()->s_expr.popx());
  368. }
  369. match_newlines_repl();
  370. if(items.size()==1 && match(TK("for"))){
  371. if(parsing_dict) consume_comp(make_expr<DictCompExpr>(), std::move(items[0]));
  372. else consume_comp(make_expr<SetCompExpr>(), std::move(items[0]));
  373. consume(TK("}"));
  374. return;
  375. }
  376. match_newlines_repl();
  377. } while (match(TK(",")));
  378. consume(TK("}"));
  379. if(items.size()==0 || parsing_dict){
  380. auto e = make_expr<DictExpr>(std::move(items));
  381. ctx()->s_expr.push(std::move(e));
  382. }else{
  383. auto e = make_expr<SetExpr>(std::move(items));
  384. ctx()->s_expr.push(std::move(e));
  385. }
  386. }
  387. void Compiler::exprCall() {
  388. auto e = make_expr<CallExpr>();
  389. e->callable = ctx()->s_expr.popx();
  390. do {
  391. match_newlines_repl();
  392. if (curr().type==TK(")")) break;
  393. if(curr().type==TK("@id") && next().type==TK("=")) {
  394. consume(TK("@id"));
  395. Str key = prev().str();
  396. consume(TK("="));
  397. EXPR();
  398. e->kwargs.push_back({key, ctx()->s_expr.popx()});
  399. } else{
  400. EXPR();
  401. if(ctx()->s_expr.top()->star_level() == 2){
  402. // **kwargs
  403. e->kwargs.push_back({"**", ctx()->s_expr.popx()});
  404. }else{
  405. // positional argument
  406. if(!e->kwargs.empty()) SyntaxError("positional argument follows keyword argument");
  407. e->args.push_back(ctx()->s_expr.popx());
  408. }
  409. }
  410. match_newlines_repl();
  411. } while (match(TK(",")));
  412. consume(TK(")"));
  413. if(e->args.size() > 32767) SyntaxError("too many positional arguments");
  414. if(e->kwargs.size() > 32767) SyntaxError("too many keyword arguments");
  415. ctx()->s_expr.push(std::move(e));
  416. }
  417. void Compiler::exprName(){
  418. Str name = prev().str();
  419. NameScope scope = name_scope();
  420. if(ctx()->global_names.count(name)){
  421. scope = NAME_GLOBAL;
  422. }
  423. ctx()->s_expr.push(make_expr<NameExpr>(name, scope));
  424. }
  425. void Compiler::exprAttrib() {
  426. consume(TK("@id"));
  427. ctx()->s_expr.push(
  428. make_expr<AttribExpr>(ctx()->s_expr.popx(), StrName::get(prev().sv()))
  429. );
  430. }
  431. void Compiler::exprSlice0() {
  432. auto slice = make_expr<SliceExpr>();
  433. if(is_expression()){ // :<stop>
  434. EXPR();
  435. slice->stop = ctx()->s_expr.popx();
  436. // try optional step
  437. if(match(TK(":"))){ // :<stop>:<step>
  438. EXPR();
  439. slice->step = ctx()->s_expr.popx();
  440. }
  441. }else if(match(TK(":"))){
  442. if(is_expression()){ // ::<step>
  443. EXPR();
  444. slice->step = ctx()->s_expr.popx();
  445. } // else ::
  446. } // else :
  447. ctx()->s_expr.push(std::move(slice));
  448. }
  449. void Compiler::exprSlice1() {
  450. auto slice = make_expr<SliceExpr>();
  451. slice->start = ctx()->s_expr.popx();
  452. if(is_expression()){ // <start>:<stop>
  453. EXPR();
  454. slice->stop = ctx()->s_expr.popx();
  455. // try optional step
  456. if(match(TK(":"))){ // <start>:<stop>:<step>
  457. EXPR();
  458. slice->step = ctx()->s_expr.popx();
  459. }
  460. }else if(match(TK(":"))){ // <start>::<step>
  461. EXPR();
  462. slice->step = ctx()->s_expr.popx();
  463. } // else <start>:
  464. ctx()->s_expr.push(std::move(slice));
  465. }
  466. void Compiler::exprSubscr() {
  467. auto e = make_expr<SubscrExpr>();
  468. match_newlines_repl();
  469. e->a = ctx()->s_expr.popx(); // a
  470. EXPR_TUPLE(true);
  471. e->b = ctx()->s_expr.popx(); // a[<expr>]
  472. match_newlines_repl();
  473. consume(TK("]"));
  474. ctx()->s_expr.push(std::move(e));
  475. }
  476. void Compiler::exprLiteral0() {
  477. ctx()->s_expr.push(make_expr<Literal0Expr>(prev().type));
  478. }
  479. void Compiler::compile_block_body(void (Compiler::*callback)()) {
  480. if(callback == nullptr) callback = &Compiler::compile_stmt;
  481. consume(TK(":"));
  482. if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){
  483. compile_stmt(); // inline block
  484. return;
  485. }
  486. if(!match_newlines(mode()==REPL_MODE)){
  487. SyntaxError("expected a new line after ':'");
  488. }
  489. consume(TK("@indent"));
  490. while (curr().type != TK("@dedent")) {
  491. match_newlines();
  492. (this->*callback)();
  493. match_newlines();
  494. }
  495. consume(TK("@dedent"));
  496. }
  497. // import a [as b]
  498. // import a [as b], c [as d]
  499. void Compiler::compile_normal_import() {
  500. do {
  501. consume(TK("@id"));
  502. Str name = prev().str();
  503. ctx()->emit_(OP_IMPORT_PATH, ctx()->add_const_string(name.sv()), prev().line);
  504. if (match(TK("as"))) {
  505. consume(TK("@id"));
  506. name = prev().str();
  507. }
  508. ctx()->emit_store_name(name_scope(), StrName(name), prev().line);
  509. } while (match(TK(",")));
  510. consume_end_stmt();
  511. }
  512. // from a import b [as c], d [as e]
  513. // from a.b import c [as d]
  514. // from . import a [as b]
  515. // from .a import b [as c]
  516. // from ..a import b [as c]
  517. // from .a.b import c [as d]
  518. // from xxx import *
  519. void Compiler::compile_from_import() {
  520. int dots = 0;
  521. while(true){
  522. switch(curr().type){
  523. case TK("."): dots+=1; break;
  524. case TK(".."): dots+=2; break;
  525. case TK("..."): dots+=3; break;
  526. default: goto __EAT_DOTS_END;
  527. }
  528. advance();
  529. }
  530. __EAT_DOTS_END:
  531. SStream ss;
  532. for(int i=0; i<dots; i++) ss << '.';
  533. if(dots > 0){
  534. // @id is optional if dots > 0
  535. if(match(TK("@id"))){
  536. ss << prev().sv();
  537. while (match(TK("."))) {
  538. consume(TK("@id"));
  539. ss << "." << prev().sv();
  540. }
  541. }
  542. }else{
  543. // @id is required if dots == 0
  544. consume(TK("@id"));
  545. ss << prev().sv();
  546. while (match(TK("."))) {
  547. consume(TK("@id"));
  548. ss << "." << prev().sv();
  549. }
  550. }
  551. ctx()->emit_(OP_IMPORT_PATH, ctx()->add_const_string(ss.str().sv()), prev().line);
  552. consume(TK("import"));
  553. if (match(TK("*"))) {
  554. if(name_scope() != NAME_GLOBAL) SyntaxError("from <module> import * can only be used in global scope");
  555. // pop the module and import __all__
  556. ctx()->emit_(OP_POP_IMPORT_STAR, BC_NOARG, prev().line);
  557. consume_end_stmt();
  558. return;
  559. }
  560. do {
  561. ctx()->emit_(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  562. consume(TK("@id"));
  563. Str name = prev().str();
  564. ctx()->emit_(OP_LOAD_ATTR, StrName(name).index, prev().line);
  565. if (match(TK("as"))) {
  566. consume(TK("@id"));
  567. name = prev().str();
  568. }
  569. ctx()->emit_store_name(name_scope(), StrName(name), prev().line);
  570. } while (match(TK(",")));
  571. ctx()->emit_(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  572. consume_end_stmt();
  573. }
  574. bool Compiler::is_expression(bool allow_slice){
  575. PrattCallback prefix = rules[curr().type].prefix;
  576. return prefix != nullptr && (allow_slice || curr().type!=TK(":"));
  577. }
  578. void Compiler::parse_expression(int precedence, bool allow_slice) {
  579. PrattCallback prefix = rules[curr().type].prefix;
  580. if (prefix==nullptr || (curr().type==TK(":") && !allow_slice)){
  581. SyntaxError(Str("expected an expression, got ") + TK_STR(curr().type));
  582. }
  583. advance();
  584. (this->*prefix)();
  585. while (rules[curr().type].precedence >= precedence && (allow_slice || curr().type!=TK(":"))) {
  586. TokenIndex op = curr().type;
  587. advance();
  588. PrattCallback infix = rules[op].infix;
  589. PK_ASSERT(infix != nullptr);
  590. (this->*infix)();
  591. }
  592. }
  593. void Compiler::compile_if_stmt() {
  594. EXPR(); // condition
  595. ctx()->emit_expr();
  596. int patch = ctx()->emit_(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  597. compile_block_body();
  598. if (match(TK("elif"))) {
  599. int exit_patch = ctx()->emit_(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  600. ctx()->patch_jump(patch);
  601. compile_if_stmt();
  602. ctx()->patch_jump(exit_patch);
  603. } else if (match(TK("else"))) {
  604. int exit_patch = ctx()->emit_(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  605. ctx()->patch_jump(patch);
  606. compile_block_body();
  607. ctx()->patch_jump(exit_patch);
  608. } else {
  609. ctx()->patch_jump(patch);
  610. }
  611. }
  612. void Compiler::compile_while_loop() {
  613. CodeBlock* block = ctx()->enter_block(CodeBlockType::WHILE_LOOP);
  614. EXPR(); // condition
  615. ctx()->emit_expr();
  616. int patch = ctx()->emit_(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  617. compile_block_body();
  618. ctx()->emit_(OP_LOOP_CONTINUE, ctx()->get_loop(), BC_KEEPLINE, true);
  619. ctx()->patch_jump(patch);
  620. ctx()->exit_block();
  621. // optional else clause
  622. if (match(TK("else"))) {
  623. compile_block_body();
  624. block->end2 = ctx()->co->codes.size();
  625. }
  626. }
  627. void Compiler::compile_for_loop() {
  628. Expr_ vars = EXPR_VARS();
  629. consume(TK("in"));
  630. EXPR_TUPLE(); ctx()->emit_expr();
  631. ctx()->emit_(OP_GET_ITER, BC_NOARG, BC_KEEPLINE);
  632. CodeBlock* block = ctx()->enter_block(CodeBlockType::FOR_LOOP);
  633. int for_codei = ctx()->emit_(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE);
  634. bool ok = vars->emit_store(ctx());
  635. if(!ok) SyntaxError(); // this error occurs in `vars` instead of this line, but...nevermind
  636. ctx()->try_merge_for_iter_store(for_codei);
  637. compile_block_body();
  638. ctx()->emit_(OP_LOOP_CONTINUE, ctx()->get_loop(), BC_KEEPLINE, true);
  639. ctx()->exit_block();
  640. // optional else clause
  641. if (match(TK("else"))) {
  642. compile_block_body();
  643. block->end2 = ctx()->co->codes.size();
  644. }
  645. }
  646. void Compiler::compile_try_except() {
  647. ctx()->enter_block(CodeBlockType::TRY_EXCEPT);
  648. compile_block_body();
  649. pod_vector<int> patches = {
  650. ctx()->emit_(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE)
  651. };
  652. ctx()->exit_block();
  653. int finally_entry = -1;
  654. if(curr().type != TK("finally")){
  655. do {
  656. StrName as_name;
  657. consume(TK("except"));
  658. if(is_expression()){
  659. EXPR(); // push assumed type on to the stack
  660. ctx()->emit_expr();
  661. ctx()->emit_(OP_EXCEPTION_MATCH, BC_NOARG, prev().line);
  662. if(match(TK("as"))){
  663. consume(TK("@id"));
  664. as_name = StrName(prev().sv());
  665. }
  666. }else{
  667. ctx()->emit_(OP_LOAD_TRUE, BC_NOARG, BC_KEEPLINE);
  668. }
  669. int patch = ctx()->emit_(OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE);
  670. // on match
  671. if(!as_name.empty()){
  672. ctx()->emit_(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  673. ctx()->emit_store_name(name_scope(), as_name, BC_KEEPLINE);
  674. }
  675. // pop the exception
  676. ctx()->emit_(OP_POP_EXCEPTION, BC_NOARG, BC_KEEPLINE);
  677. compile_block_body();
  678. patches.push_back(ctx()->emit_(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE));
  679. ctx()->patch_jump(patch);
  680. }while(curr().type == TK("except"));
  681. }
  682. if(match(TK("finally"))){
  683. int patch = ctx()->emit_(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE);
  684. finally_entry = ctx()->co->codes.size();
  685. compile_block_body();
  686. ctx()->emit_(OP_JUMP_ABSOLUTE_TOP, BC_NOARG, BC_KEEPLINE);
  687. ctx()->patch_jump(patch);
  688. }
  689. // no match, re-raise
  690. if(finally_entry != -1){
  691. i64 target = ctx()->co->codes.size()+2;
  692. ctx()->emit_(OP_LOAD_CONST, ctx()->add_const(VAR(target)), BC_KEEPLINE);
  693. ctx()->emit_(OP_JUMP_ABSOLUTE, finally_entry, BC_KEEPLINE);
  694. }
  695. ctx()->emit_(OP_RE_RAISE, BC_NOARG, BC_KEEPLINE);
  696. // no exception or no match, jump to the end
  697. for (int patch : patches) ctx()->patch_jump(patch);
  698. if(finally_entry != -1){
  699. i64 target = ctx()->co->codes.size()+2;
  700. ctx()->emit_(OP_LOAD_CONST, ctx()->add_const(VAR(target)), BC_KEEPLINE);
  701. ctx()->emit_(OP_JUMP_ABSOLUTE, finally_entry, BC_KEEPLINE);
  702. }
  703. }
  704. void Compiler::compile_decorated(){
  705. Expr_vector decorators;
  706. do{
  707. EXPR();
  708. decorators.push_back(ctx()->s_expr.popx());
  709. if(!match_newlines_repl()) SyntaxError();
  710. }while(match(TK("@")));
  711. if(match(TK("class"))){
  712. compile_class(decorators);
  713. }else{
  714. consume(TK("def"));
  715. compile_function(decorators);
  716. }
  717. }
  718. bool Compiler::try_compile_assignment(){
  719. switch (curr().type) {
  720. case TK("+="): case TK("-="): case TK("*="): case TK("/="): case TK("//="): case TK("%="):
  721. case TK("<<="): case TK(">>="): case TK("&="): case TK("|="): case TK("^="): {
  722. Expr* lhs_p = ctx()->s_expr.top().get();
  723. if(lhs_p->is_starred()) SyntaxError();
  724. if(ctx()->is_compiling_class) SyntaxError("can't use inplace operator in class definition");
  725. advance();
  726. auto e = make_expr<BinaryExpr>();
  727. e->op = prev().type - 1; // -1 to remove =
  728. e->lhs = ctx()->s_expr.popx();
  729. EXPR_TUPLE();
  730. e->rhs = ctx()->s_expr.popx();
  731. if(e->is_starred()) SyntaxError();
  732. e->emit_(ctx());
  733. bool ok = lhs_p->emit_store(ctx());
  734. if(!ok) SyntaxError();
  735. } return true;
  736. case TK("="): {
  737. int n = 0;
  738. while(match(TK("="))){
  739. EXPR_TUPLE();
  740. n += 1;
  741. }
  742. // stack size is n+1
  743. Expr_ val = ctx()->s_expr.popx();
  744. val->emit_(ctx());
  745. for(int j=1; j<n; j++) ctx()->emit_(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  746. for(int j=0; j<n; j++){
  747. auto e = ctx()->s_expr.popx();
  748. if(e->is_starred()) SyntaxError();
  749. bool ok = e->emit_store(ctx());
  750. if(!ok) SyntaxError();
  751. }
  752. } return true;
  753. default: return false;
  754. }
  755. }
  756. void Compiler::compile_stmt() {
  757. if(match(TK("class"))){
  758. compile_class();
  759. return;
  760. }
  761. advance();
  762. int kw_line = prev().line; // backup line number
  763. int curr_loop_block = ctx()->get_loop();
  764. switch(prev().type){
  765. case TK("break"):
  766. if (curr_loop_block < 0) SyntaxError("'break' outside loop");
  767. ctx()->emit_(OP_LOOP_BREAK, curr_loop_block, kw_line);
  768. consume_end_stmt();
  769. break;
  770. case TK("continue"):
  771. if (curr_loop_block < 0) SyntaxError("'continue' not properly in loop");
  772. ctx()->emit_(OP_LOOP_CONTINUE, curr_loop_block, kw_line);
  773. consume_end_stmt();
  774. break;
  775. case TK("yield"):
  776. if (contexts.size() <= 1) SyntaxError("'yield' outside function");
  777. EXPR_TUPLE(); ctx()->emit_expr();
  778. ctx()->emit_(OP_YIELD_VALUE, BC_NOARG, kw_line);
  779. consume_end_stmt();
  780. break;
  781. case TK("yield from"):
  782. if (contexts.size() <= 1) SyntaxError("'yield from' outside function");
  783. EXPR_TUPLE(); ctx()->emit_expr();
  784. ctx()->emit_(OP_GET_ITER, BC_NOARG, kw_line);
  785. ctx()->enter_block(CodeBlockType::FOR_LOOP);
  786. ctx()->emit_(OP_FOR_ITER_YIELD_VALUE, BC_NOARG, kw_line);
  787. ctx()->emit_(OP_LOOP_CONTINUE, ctx()->get_loop(), kw_line);
  788. ctx()->exit_block();
  789. consume_end_stmt();
  790. break;
  791. case TK("return"):
  792. if (contexts.size() <= 1) SyntaxError("'return' outside function");
  793. if(match_end_stmt()){
  794. ctx()->emit_(OP_RETURN_VALUE, 1, kw_line);
  795. }else{
  796. EXPR_TUPLE(); ctx()->emit_expr();
  797. consume_end_stmt();
  798. ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, kw_line);
  799. }
  800. break;
  801. /*************************************************/
  802. case TK("if"): compile_if_stmt(); break;
  803. case TK("while"): compile_while_loop(); break;
  804. case TK("for"): compile_for_loop(); break;
  805. case TK("import"): compile_normal_import(); break;
  806. case TK("from"): compile_from_import(); break;
  807. case TK("def"): compile_function(); break;
  808. case TK("@"): compile_decorated(); break;
  809. case TK("try"): compile_try_except(); break;
  810. case TK("pass"): consume_end_stmt(); break;
  811. /*************************************************/
  812. case TK("++"):{
  813. consume(TK("@id"));
  814. StrName name(prev().sv());
  815. NameScope scope = name_scope();
  816. bool is_global = ctx()->global_names.count(name.sv());
  817. if(is_global) scope = NAME_GLOBAL;
  818. switch(scope){
  819. case NAME_LOCAL:
  820. ctx()->emit_(OP_INC_FAST, ctx()->add_varname(name), prev().line);
  821. break;
  822. case NAME_GLOBAL:
  823. ctx()->emit_(OP_INC_GLOBAL, name.index, prev().line);
  824. break;
  825. default: SyntaxError(); break;
  826. }
  827. consume_end_stmt();
  828. break;
  829. }
  830. case TK("--"):{
  831. consume(TK("@id"));
  832. StrName name(prev().sv());
  833. switch(name_scope()){
  834. case NAME_LOCAL:
  835. ctx()->emit_(OP_DEC_FAST, ctx()->add_varname(name), prev().line);
  836. break;
  837. case NAME_GLOBAL:
  838. ctx()->emit_(OP_DEC_GLOBAL, name.index, prev().line);
  839. break;
  840. default: SyntaxError(); break;
  841. }
  842. consume_end_stmt();
  843. break;
  844. }
  845. case TK("assert"):{
  846. EXPR(); // condition
  847. ctx()->emit_expr();
  848. int index = ctx()->emit_(OP_POP_JUMP_IF_TRUE, BC_NOARG, kw_line);
  849. int has_msg = 0;
  850. if(match(TK(","))){
  851. EXPR(); // message
  852. ctx()->emit_expr();
  853. has_msg = 1;
  854. }
  855. ctx()->emit_(OP_RAISE_ASSERT, has_msg, kw_line);
  856. ctx()->patch_jump(index);
  857. consume_end_stmt();
  858. break;
  859. }
  860. case TK("global"):
  861. do {
  862. consume(TK("@id"));
  863. ctx()->global_names.insert(prev().str());
  864. } while (match(TK(",")));
  865. consume_end_stmt();
  866. break;
  867. case TK("raise"): {
  868. EXPR(); ctx()->emit_expr();
  869. ctx()->emit_(OP_RAISE, BC_NOARG, kw_line);
  870. consume_end_stmt();
  871. } break;
  872. case TK("del"): {
  873. EXPR_TUPLE();
  874. Expr_ e = ctx()->s_expr.popx();
  875. bool ok = e->emit_del(ctx());
  876. if(!ok) SyntaxError();
  877. consume_end_stmt();
  878. } break;
  879. case TK("with"): {
  880. EXPR(); // [ <expr> ]
  881. ctx()->emit_expr();
  882. ctx()->enter_block(CodeBlockType::CONTEXT_MANAGER);
  883. Expr_ as_name;
  884. if(match(TK("as"))){
  885. consume(TK("@id"));
  886. as_name = make_expr<NameExpr>(prev().str(), name_scope());
  887. }
  888. ctx()->emit_(OP_WITH_ENTER, BC_NOARG, prev().line);
  889. // [ <expr> <expr>.__enter__() ]
  890. if(as_name != nullptr){
  891. bool ok = as_name->emit_store(ctx());
  892. if(!ok) SyntaxError();
  893. }else{
  894. ctx()->emit_(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  895. }
  896. compile_block_body();
  897. ctx()->emit_(OP_WITH_EXIT, BC_NOARG, prev().line);
  898. ctx()->exit_block();
  899. } break;
  900. /*************************************************/
  901. case TK("=="): {
  902. consume(TK("@id"));
  903. if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE");
  904. bool ok = ctx()->add_label(prev().str());
  905. consume(TK("=="));
  906. if(!ok) SyntaxError("label " + prev().str().escape() + " already exists");
  907. consume_end_stmt();
  908. } break;
  909. case TK("->"):
  910. consume(TK("@id"));
  911. if(mode()!=EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE");
  912. ctx()->emit_(OP_GOTO, StrName(prev().sv()).index, prev().line);
  913. consume_end_stmt();
  914. break;
  915. /*************************************************/
  916. // handle dangling expression or assignment
  917. default: {
  918. advance(-1); // do revert since we have pre-called advance() at the beginning
  919. EXPR_TUPLE();
  920. bool is_typed_name = false; // e.g. x: int
  921. // eat variable's type hint if it is a single name
  922. if(ctx()->s_expr.top()->is_name()){
  923. if(match(TK(":"))){
  924. consume_type_hints();
  925. is_typed_name = true;
  926. if(ctx()->is_compiling_class){
  927. NameExpr* ne = static_cast<NameExpr*>(ctx()->s_expr.top().get());
  928. ctx()->emit_(OP_ADD_CLASS_ANNOTATION, ne->name.index, BC_KEEPLINE);
  929. }
  930. }
  931. }
  932. if(!try_compile_assignment()){
  933. if(!ctx()->s_expr.empty() && ctx()->s_expr.top()->is_starred()){
  934. SyntaxError();
  935. }
  936. if(!is_typed_name){
  937. ctx()->emit_expr();
  938. if((mode()==CELL_MODE || mode()==REPL_MODE) && name_scope()==NAME_GLOBAL){
  939. ctx()->emit_(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE);
  940. }else{
  941. ctx()->emit_(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  942. }
  943. }else{
  944. PK_ASSERT(ctx()->s_expr.size() == 1)
  945. ctx()->s_expr.pop();
  946. }
  947. }
  948. consume_end_stmt();
  949. }
  950. }
  951. }
  952. void Compiler::consume_type_hints(){
  953. EXPR();
  954. ctx()->s_expr.pop();
  955. }
  956. void Compiler::_add_decorators(const Expr_vector& decorators){
  957. // [obj]
  958. for(auto it=decorators.rbegin(); it!=decorators.rend(); ++it){
  959. (*it)->emit_(ctx()); // [obj, f]
  960. ctx()->emit_(OP_ROT_TWO, BC_NOARG, (*it)->line); // [f, obj]
  961. ctx()->emit_(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE); // [f, obj, NULL]
  962. ctx()->emit_(OP_ROT_TWO, BC_NOARG, BC_KEEPLINE); // [obj, NULL, f]
  963. ctx()->emit_(OP_CALL, 1, (*it)->line); // [obj]
  964. }
  965. }
  966. void Compiler::compile_class(const Expr_vector& decorators){
  967. consume(TK("@id"));
  968. int namei = StrName(prev().sv()).index;
  969. Expr_ base = nullptr;
  970. if(match(TK("("))){
  971. if(is_expression()){
  972. EXPR();
  973. base = ctx()->s_expr.popx();
  974. }
  975. consume(TK(")"));
  976. }
  977. if(base == nullptr){
  978. ctx()->emit_(OP_LOAD_NONE, BC_NOARG, prev().line);
  979. }else {
  980. base->emit_(ctx());
  981. }
  982. ctx()->emit_(OP_BEGIN_CLASS, namei, BC_KEEPLINE);
  983. for(auto& c: this->contexts.container()){
  984. if(c.is_compiling_class){
  985. SyntaxError("nested class is not allowed");
  986. }
  987. }
  988. ctx()->is_compiling_class = true;
  989. compile_block_body();
  990. ctx()->is_compiling_class = false;
  991. if(!decorators.empty()){
  992. ctx()->emit_(OP_BEGIN_CLASS_DECORATION, BC_NOARG, BC_KEEPLINE);
  993. _add_decorators(decorators);
  994. ctx()->emit_(OP_END_CLASS_DECORATION, BC_NOARG, BC_KEEPLINE);
  995. }
  996. ctx()->emit_(OP_END_CLASS, namei, BC_KEEPLINE);
  997. }
  998. void Compiler::_compile_f_args(FuncDecl_ decl, bool enable_type_hints){
  999. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  1000. do {
  1001. if(state > 3) SyntaxError();
  1002. if(state == 3) SyntaxError("**kwargs should be the last argument");
  1003. match_newlines();
  1004. if(match(TK("*"))){
  1005. if(state < 1) state = 1;
  1006. else SyntaxError("*args should be placed before **kwargs");
  1007. }
  1008. else if(match(TK("**"))){
  1009. state = 3;
  1010. }
  1011. consume(TK("@id"));
  1012. StrName name = prev().str();
  1013. // check duplicate argument name
  1014. for(int j: decl->args){
  1015. if(decl->code->varnames[j] == name) {
  1016. SyntaxError("duplicate argument name");
  1017. }
  1018. }
  1019. for(auto& kv: decl->kwargs){
  1020. if(decl->code->varnames[kv.index] == name){
  1021. SyntaxError("duplicate argument name");
  1022. }
  1023. }
  1024. if(decl->starred_arg!=-1 && decl->code->varnames[decl->starred_arg] == name){
  1025. SyntaxError("duplicate argument name");
  1026. }
  1027. if(decl->starred_kwarg!=-1 && decl->code->varnames[decl->starred_kwarg] == name){
  1028. SyntaxError("duplicate argument name");
  1029. }
  1030. // eat type hints
  1031. if(enable_type_hints && match(TK(":"))) consume_type_hints();
  1032. if(state == 0 && curr().type == TK("=")) state = 2;
  1033. int index = ctx()->add_varname(name);
  1034. switch (state)
  1035. {
  1036. case 0:
  1037. decl->args.push_back(index);
  1038. break;
  1039. case 1:
  1040. decl->starred_arg = index;
  1041. state+=1;
  1042. break;
  1043. case 2: {
  1044. consume(TK("="));
  1045. PyObject* value = read_literal();
  1046. if(value == nullptr){
  1047. SyntaxError(Str("default argument must be a literal"));
  1048. }
  1049. decl->add_kwarg(index, name, value);
  1050. } break;
  1051. case 3:
  1052. decl->starred_kwarg = index;
  1053. state+=1;
  1054. break;
  1055. }
  1056. } while (match(TK(",")));
  1057. }
  1058. void Compiler::compile_function(const Expr_vector& decorators){
  1059. consume(TK("@id"));
  1060. Str decl_name = prev().str();
  1061. FuncDecl_ decl = push_f_context(decl_name);
  1062. consume(TK("("));
  1063. if (!match(TK(")"))) {
  1064. _compile_f_args(decl, true);
  1065. consume(TK(")"));
  1066. }
  1067. if(match(TK("->"))) consume_type_hints();
  1068. compile_block_body();
  1069. pop_context();
  1070. decl->docstring = nullptr;
  1071. if(decl->code->codes.size()>=2 && decl->code->codes[0].op == OP_LOAD_CONST && decl->code->codes[1].op == OP_POP_TOP){
  1072. PyObject* c = decl->code->consts[decl->code->codes[0].arg];
  1073. if(is_type(c, vm->tp_str)){
  1074. decl->code->codes[0].op = OP_NO_OP;
  1075. decl->code->codes[1].op = OP_NO_OP;
  1076. decl->docstring = PK_OBJ_GET(Str, c).c_str();
  1077. }
  1078. }
  1079. ctx()->emit_(OP_LOAD_FUNCTION, ctx()->add_func_decl(decl), prev().line);
  1080. _add_decorators(decorators);
  1081. if(!ctx()->is_compiling_class){
  1082. auto e = make_expr<NameExpr>(decl_name, name_scope());
  1083. e->emit_store(ctx());
  1084. }else{
  1085. int index = StrName(decl_name).index;
  1086. ctx()->emit_(OP_STORE_CLASS_ATTR, index, prev().line);
  1087. }
  1088. }
  1089. PyObject* Compiler::to_object(const TokenValue& value){
  1090. PyObject* obj = nullptr;
  1091. if(std::holds_alternative<i64>(value)){
  1092. obj = VAR(std::get<i64>(value));
  1093. }
  1094. if(std::holds_alternative<f64>(value)){
  1095. obj = VAR(std::get<f64>(value));
  1096. }
  1097. if(std::holds_alternative<Str>(value)){
  1098. obj = VAR(std::get<Str>(value));
  1099. }
  1100. PK_ASSERT(obj != nullptr)
  1101. return obj;
  1102. }
  1103. PyObject* Compiler::read_literal(){
  1104. advance();
  1105. switch(prev().type){
  1106. case TK("-"): {
  1107. consume(TK("@num"));
  1108. PyObject* val = to_object(prev().value);
  1109. return vm->py_negate(val);
  1110. }
  1111. case TK("@num"): return to_object(prev().value);
  1112. case TK("@str"): return to_object(prev().value);
  1113. case TK("True"): return VAR(true);
  1114. case TK("False"): return VAR(false);
  1115. case TK("None"): return vm->None;
  1116. case TK("..."): return vm->Ellipsis;
  1117. case TK("("): {
  1118. List cpnts;
  1119. while(true) {
  1120. cpnts.push_back(read_literal());
  1121. if(curr().type == TK(")")) break;
  1122. consume(TK(","));
  1123. if(curr().type == TK(")")) break;
  1124. }
  1125. consume(TK(")"));
  1126. return VAR(Tuple(std::move(cpnts)));
  1127. }
  1128. default: break;
  1129. }
  1130. return nullptr;
  1131. }
  1132. Compiler::Compiler(VM* vm, std::string_view source, const Str& filename, CompileMode mode, bool unknown_global_scope)
  1133. :lexer(vm, std::make_shared<SourceData>(source, filename, mode)){
  1134. this->vm = vm;
  1135. this->unknown_global_scope = unknown_global_scope;
  1136. init_pratt_rules();
  1137. }
  1138. Str Compiler::precompile(){
  1139. auto tokens = lexer.run();
  1140. SStream ss;
  1141. ss << "pkpy:" PK_VERSION << '\n'; // L1: version string
  1142. ss << (int)mode() << '\n'; // L2: mode
  1143. std::map<std::string_view, int> token_indices;
  1144. for(auto token: tokens){
  1145. if(is_raw_string_used(token.type)){
  1146. auto it = token_indices.find(token.sv());
  1147. if(it == token_indices.end()){
  1148. token_indices[token.sv()] = 0;
  1149. // assert no '\n' in token.sv()
  1150. for(char c: token.sv()) if(c=='\n') PK_FATAL_ERROR();
  1151. }
  1152. }
  1153. }
  1154. ss << "=" << (int)token_indices.size() << '\n'; // L3: raw string count
  1155. int index = 0;
  1156. for(auto& kv: token_indices){
  1157. ss << kv.first << '\n'; // L4: raw strings
  1158. kv.second = index++;
  1159. }
  1160. ss << "=" << (int)tokens.size() << '\n'; // L5: token count
  1161. for(int i=0; i<tokens.size(); i++){
  1162. const Token& token = tokens[i];
  1163. ss << (int)token.type << ',';
  1164. if(is_raw_string_used(token.type)){
  1165. ss << token_indices[token.sv()] << ',';
  1166. }
  1167. if(i>0 && tokens[i-1].line == token.line) ss << ',';
  1168. else ss << token.line << ',';
  1169. if(i>0 && tokens[i-1].brackets_level == token.brackets_level) ss << ',';
  1170. else ss << token.brackets_level << ',';
  1171. // visit token value
  1172. std::visit([&ss](auto&& arg){
  1173. using T = std::decay_t<decltype(arg)>;
  1174. if constexpr(std::is_same_v<T, i64>){
  1175. ss << 'I' << arg;
  1176. }else if constexpr(std::is_same_v<T, f64>){
  1177. ss << 'F' << arg;
  1178. }else if constexpr(std::is_same_v<T, Str>){
  1179. ss << 'S';
  1180. for(char c: arg) ss.write_hex((unsigned char)c);
  1181. }
  1182. ss << '\n';
  1183. }, token.value);
  1184. }
  1185. return ss.str();
  1186. }
  1187. void Compiler::from_precompiled(const char* source){
  1188. TokenDeserializer deserializer(source);
  1189. deserializer.curr += 5; // skip "pkpy:"
  1190. std::string_view version = deserializer.read_string('\n');
  1191. if(version != PK_VERSION){
  1192. SyntaxError(_S("precompiled version mismatch: ", version, "!=" PK_VERSION));
  1193. }
  1194. if(deserializer.read_uint('\n') != (i64)mode()){
  1195. SyntaxError("precompiled mode mismatch");
  1196. }
  1197. int count = deserializer.read_count();
  1198. std::vector<Str>& precompiled_tokens = lexer.src->_precompiled_tokens;
  1199. for(int i=0; i<count; i++){
  1200. precompiled_tokens.push_back(deserializer.read_string('\n'));
  1201. }
  1202. count = deserializer.read_count();
  1203. for(int i=0; i<count; i++){
  1204. Token t;
  1205. t.type = (unsigned char)deserializer.read_uint(',');
  1206. if(is_raw_string_used(t.type)){
  1207. i64 index = deserializer.read_uint(',');
  1208. t.start = precompiled_tokens[index].c_str();
  1209. t.length = precompiled_tokens[index].size;
  1210. }else{
  1211. t.start = nullptr;
  1212. t.length = 0;
  1213. }
  1214. if(deserializer.match_char(',')){
  1215. t.line = tokens.back().line;
  1216. }else{
  1217. t.line = (int)deserializer.read_uint(',');
  1218. }
  1219. if(deserializer.match_char(',')){
  1220. t.brackets_level = tokens.back().brackets_level;
  1221. }else{
  1222. t.brackets_level = (int)deserializer.read_uint(',');
  1223. }
  1224. char type = deserializer.read_char();
  1225. switch(type){
  1226. case 'I': t.value = deserializer.read_uint('\n'); break;
  1227. case 'F': t.value = deserializer.read_float('\n'); break;
  1228. case 'S': t.value = deserializer.read_string_from_hex('\n'); break;
  1229. default: t.value = {}; break;
  1230. }
  1231. tokens.push_back(t);
  1232. }
  1233. }
  1234. CodeObject_ Compiler::compile(){
  1235. PK_ASSERT(i == 0) // make sure it is the first time to compile
  1236. if(lexer.src->is_precompiled){
  1237. from_precompiled(lexer.src->source.c_str());
  1238. }else{
  1239. this->tokens = lexer.run();
  1240. }
  1241. CodeObject_ code = push_global_context();
  1242. advance(); // skip @sof, so prev() is always valid
  1243. match_newlines(); // skip possible leading '\n'
  1244. if(mode()==EVAL_MODE) {
  1245. EXPR_TUPLE(); ctx()->emit_expr();
  1246. consume(TK("@eof"));
  1247. ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  1248. pop_context();
  1249. return code;
  1250. }else if(mode()==JSON_MODE){
  1251. EXPR();
  1252. Expr_ e = ctx()->s_expr.popx();
  1253. if(!e->is_json_object()) SyntaxError("expect a JSON object, literal or array");
  1254. consume(TK("@eof"));
  1255. e->emit_(ctx());
  1256. ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  1257. pop_context();
  1258. return code;
  1259. }
  1260. while (!match(TK("@eof"))) {
  1261. compile_stmt();
  1262. match_newlines();
  1263. }
  1264. pop_context();
  1265. return code;
  1266. }
  1267. // TODO: refactor this
  1268. void Lexer::throw_err(StrName type, Str msg, int lineno, const char* cursor){
  1269. PyObject* e_obj = vm->call(vm->builtins->attr(type), VAR(msg));
  1270. Exception& e = PK_OBJ_GET(Exception, e_obj);
  1271. e.st_push(src, lineno, cursor, "");
  1272. throw e;
  1273. }
  1274. std::string_view TokenDeserializer::read_string(char c){
  1275. const char* start = curr;
  1276. while(*curr != c) curr++;
  1277. std::string_view retval(start, curr-start);
  1278. curr++; // skip the delimiter
  1279. return retval;
  1280. }
  1281. Str TokenDeserializer::read_string_from_hex(char c){
  1282. std::string_view s = read_string(c);
  1283. char* buffer = (char*)pool64_alloc(s.size()/2 + 1);
  1284. for(int i=0; i<s.size(); i+=2){
  1285. char c = 0;
  1286. if(s[i]>='0' && s[i]<='9') c += s[i]-'0';
  1287. else if(s[i]>='a' && s[i]<='f') c += s[i]-'a'+10;
  1288. else PK_FATAL_ERROR();
  1289. c <<= 4;
  1290. if(s[i+1]>='0' && s[i+1]<='9') c += s[i+1]-'0';
  1291. else if(s[i+1]>='a' && s[i+1]<='f') c += s[i+1]-'a'+10;
  1292. else PK_FATAL_ERROR();
  1293. buffer[i/2] = c;
  1294. }
  1295. buffer[s.size()/2] = 0;
  1296. return std::pair<char*, int>(buffer, s.size()/2);
  1297. }
  1298. int TokenDeserializer::read_count(){
  1299. PK_ASSERT(*curr == '=')
  1300. curr++;
  1301. return read_uint('\n');
  1302. }
  1303. i64 TokenDeserializer::read_uint(char c){
  1304. i64 out = 0;
  1305. while(*curr != c){
  1306. out = out*10 + (*curr-'0');
  1307. curr++;
  1308. }
  1309. curr++; // skip the delimiter
  1310. return out;
  1311. }
  1312. f64 TokenDeserializer::read_float(char c){
  1313. std::string_view sv = read_string(c);
  1314. return std::stod(std::string(sv));
  1315. }
  1316. } // namespace pkpy